1 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64InstrInfo.h" 16 #include "AArch64Subtarget.h" 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "llvm/ADT/BitVector.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Target/TargetInstrInfo.h" 30 #include "llvm/Target/TargetMachine.h" 31 #include "llvm/Target/TargetRegisterInfo.h" 32 using namespace llvm; 33 34 #define DEBUG_TYPE "aarch64-ldst-opt" 35 36 /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 37 /// load / store instructions to form ldp / stp instructions. 38 39 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 40 STATISTIC(NumPostFolded, "Number of post-index updates folded"); 41 STATISTIC(NumPreFolded, "Number of pre-index updates folded"); 42 STATISTIC(NumUnscaledPairCreated, 43 "Number of load/store from unscaled generated"); 44 45 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 46 cl::init(20), cl::Hidden); 47 48 namespace llvm { 49 void initializeAArch64LoadStoreOptPass(PassRegistry &); 50 } 51 52 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 53 54 namespace { 55 56 typedef struct LdStPairFlags { 57 // If a matching instruction is found, MergeForward is set to true if the 58 // merge is to remove the first instruction and replace the second with 59 // a pair-wise insn, and false if the reverse is true. 60 bool MergeForward; 61 62 // SExtIdx gives the index of the result of the load pair that must be 63 // extended. The value of SExtIdx assumes that the paired load produces the 64 // value in this order: (I, returned iterator), i.e., -1 means no value has 65 // to be extended, 0 means I, and 1 means the returned iterator. 66 int SExtIdx; 67 68 LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} 69 70 void setMergeForward(bool V = true) { MergeForward = V; } 71 bool getMergeForward() const { return MergeForward; } 72 73 void setSExtIdx(int V) { SExtIdx = V; } 74 int getSExtIdx() const { return SExtIdx; } 75 76 } LdStPairFlags; 77 78 struct AArch64LoadStoreOpt : public MachineFunctionPass { 79 static char ID; 80 AArch64LoadStoreOpt() : MachineFunctionPass(ID) { 81 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); 82 } 83 84 const AArch64InstrInfo *TII; 85 const TargetRegisterInfo *TRI; 86 87 // Scan the instructions looking for a load/store that can be combined 88 // with the current instruction into a load/store pair. 89 // Return the matching instruction if one is found, else MBB->end(). 90 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 91 LdStPairFlags &Flags, 92 unsigned Limit); 93 // Merge the two instructions indicated into a single pair-wise instruction. 94 // If MergeForward is true, erase the first instruction and fold its 95 // operation into the second. If false, the reverse. Return the instruction 96 // following the first instruction (which may change during processing). 97 MachineBasicBlock::iterator 98 mergePairedInsns(MachineBasicBlock::iterator I, 99 MachineBasicBlock::iterator Paired, 100 const LdStPairFlags &Flags); 101 102 // Scan the instruction list to find a base register update that can 103 // be combined with the current instruction (a load or store) using 104 // pre or post indexed addressing with writeback. Scan forwards. 105 MachineBasicBlock::iterator 106 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 107 int UnscaledOffset); 108 109 // Scan the instruction list to find a base register update that can 110 // be combined with the current instruction (a load or store) using 111 // pre or post indexed addressing with writeback. Scan backwards. 112 MachineBasicBlock::iterator 113 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 114 115 // Find an instruction that updates the base register of the ld/st 116 // instruction. 117 bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI, 118 unsigned BaseReg, int Offset); 119 120 // Merge a pre- or post-index base register update into a ld/st instruction. 121 MachineBasicBlock::iterator 122 mergeUpdateInsn(MachineBasicBlock::iterator I, 123 MachineBasicBlock::iterator Update, bool IsPreIdx); 124 125 bool optimizeBlock(MachineBasicBlock &MBB); 126 127 bool runOnMachineFunction(MachineFunction &Fn) override; 128 129 const char *getPassName() const override { 130 return AARCH64_LOAD_STORE_OPT_NAME; 131 } 132 }; 133 char AArch64LoadStoreOpt::ID = 0; 134 } // namespace 135 136 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", 137 AARCH64_LOAD_STORE_OPT_NAME, false, false) 138 139 static bool isUnscaledLdSt(unsigned Opc) { 140 switch (Opc) { 141 default: 142 return false; 143 case AArch64::STURSi: 144 case AArch64::STURDi: 145 case AArch64::STURQi: 146 case AArch64::STURWi: 147 case AArch64::STURXi: 148 case AArch64::LDURSi: 149 case AArch64::LDURDi: 150 case AArch64::LDURQi: 151 case AArch64::LDURWi: 152 case AArch64::LDURXi: 153 case AArch64::LDURSWi: 154 return true; 155 } 156 } 157 158 static bool isUnscaledLdSt(MachineInstr *MI) { 159 return isUnscaledLdSt(MI->getOpcode()); 160 } 161 162 // Scaling factor for unscaled load or store. 163 static int getMemScale(MachineInstr *MI) { 164 switch (MI->getOpcode()) { 165 default: 166 llvm_unreachable("Opcode has unknown scale!"); 167 case AArch64::LDRBBui: 168 case AArch64::STRBBui: 169 return 1; 170 case AArch64::LDRHHui: 171 case AArch64::STRHHui: 172 return 2; 173 case AArch64::LDRSui: 174 case AArch64::LDURSi: 175 case AArch64::LDRSWui: 176 case AArch64::LDURSWi: 177 case AArch64::LDRWui: 178 case AArch64::LDURWi: 179 case AArch64::STRSui: 180 case AArch64::STURSi: 181 case AArch64::STRWui: 182 case AArch64::STURWi: 183 case AArch64::LDPSi: 184 case AArch64::LDPSWi: 185 case AArch64::LDPWi: 186 case AArch64::STPSi: 187 case AArch64::STPWi: 188 return 4; 189 case AArch64::LDRDui: 190 case AArch64::LDURDi: 191 case AArch64::LDRXui: 192 case AArch64::LDURXi: 193 case AArch64::STRDui: 194 case AArch64::STURDi: 195 case AArch64::STRXui: 196 case AArch64::STURXi: 197 case AArch64::LDPDi: 198 case AArch64::LDPXi: 199 case AArch64::STPDi: 200 case AArch64::STPXi: 201 return 8; 202 case AArch64::LDRQui: 203 case AArch64::LDURQi: 204 case AArch64::STRQui: 205 case AArch64::STURQi: 206 case AArch64::LDPQi: 207 case AArch64::STPQi: 208 return 16; 209 } 210 } 211 212 static unsigned getMatchingNonSExtOpcode(unsigned Opc, 213 bool *IsValidLdStrOpc = nullptr) { 214 if (IsValidLdStrOpc) 215 *IsValidLdStrOpc = true; 216 switch (Opc) { 217 default: 218 if (IsValidLdStrOpc) 219 *IsValidLdStrOpc = false; 220 return UINT_MAX; 221 case AArch64::STRDui: 222 case AArch64::STURDi: 223 case AArch64::STRQui: 224 case AArch64::STURQi: 225 case AArch64::STRWui: 226 case AArch64::STURWi: 227 case AArch64::STRXui: 228 case AArch64::STURXi: 229 case AArch64::LDRDui: 230 case AArch64::LDURDi: 231 case AArch64::LDRQui: 232 case AArch64::LDURQi: 233 case AArch64::LDRWui: 234 case AArch64::LDURWi: 235 case AArch64::LDRXui: 236 case AArch64::LDURXi: 237 case AArch64::STRSui: 238 case AArch64::STURSi: 239 case AArch64::LDRSui: 240 case AArch64::LDURSi: 241 return Opc; 242 case AArch64::LDRSWui: 243 return AArch64::LDRWui; 244 case AArch64::LDURSWi: 245 return AArch64::LDURWi; 246 } 247 } 248 249 static unsigned getMatchingPairOpcode(unsigned Opc) { 250 switch (Opc) { 251 default: 252 llvm_unreachable("Opcode has no pairwise equivalent!"); 253 case AArch64::STRSui: 254 case AArch64::STURSi: 255 return AArch64::STPSi; 256 case AArch64::STRDui: 257 case AArch64::STURDi: 258 return AArch64::STPDi; 259 case AArch64::STRQui: 260 case AArch64::STURQi: 261 return AArch64::STPQi; 262 case AArch64::STRWui: 263 case AArch64::STURWi: 264 return AArch64::STPWi; 265 case AArch64::STRXui: 266 case AArch64::STURXi: 267 return AArch64::STPXi; 268 case AArch64::LDRSui: 269 case AArch64::LDURSi: 270 return AArch64::LDPSi; 271 case AArch64::LDRDui: 272 case AArch64::LDURDi: 273 return AArch64::LDPDi; 274 case AArch64::LDRQui: 275 case AArch64::LDURQi: 276 return AArch64::LDPQi; 277 case AArch64::LDRWui: 278 case AArch64::LDURWi: 279 return AArch64::LDPWi; 280 case AArch64::LDRXui: 281 case AArch64::LDURXi: 282 return AArch64::LDPXi; 283 case AArch64::LDRSWui: 284 case AArch64::LDURSWi: 285 return AArch64::LDPSWi; 286 } 287 } 288 289 static unsigned getPreIndexedOpcode(unsigned Opc) { 290 switch (Opc) { 291 default: 292 llvm_unreachable("Opcode has no pre-indexed equivalent!"); 293 case AArch64::STRSui: 294 return AArch64::STRSpre; 295 case AArch64::STRDui: 296 return AArch64::STRDpre; 297 case AArch64::STRQui: 298 return AArch64::STRQpre; 299 case AArch64::STRBBui: 300 return AArch64::STRBBpre; 301 case AArch64::STRHHui: 302 return AArch64::STRHHpre; 303 case AArch64::STRWui: 304 return AArch64::STRWpre; 305 case AArch64::STRXui: 306 return AArch64::STRXpre; 307 case AArch64::LDRSui: 308 return AArch64::LDRSpre; 309 case AArch64::LDRDui: 310 return AArch64::LDRDpre; 311 case AArch64::LDRQui: 312 return AArch64::LDRQpre; 313 case AArch64::LDRBBui: 314 return AArch64::LDRBBpre; 315 case AArch64::LDRHHui: 316 return AArch64::LDRHHpre; 317 case AArch64::LDRWui: 318 return AArch64::LDRWpre; 319 case AArch64::LDRXui: 320 return AArch64::LDRXpre; 321 case AArch64::LDRSWui: 322 return AArch64::LDRSWpre; 323 case AArch64::LDPSi: 324 return AArch64::LDPSpre; 325 case AArch64::LDPSWi: 326 return AArch64::LDPSWpre; 327 case AArch64::LDPDi: 328 return AArch64::LDPDpre; 329 case AArch64::LDPQi: 330 return AArch64::LDPQpre; 331 case AArch64::LDPWi: 332 return AArch64::LDPWpre; 333 case AArch64::LDPXi: 334 return AArch64::LDPXpre; 335 case AArch64::STPSi: 336 return AArch64::STPSpre; 337 case AArch64::STPDi: 338 return AArch64::STPDpre; 339 case AArch64::STPQi: 340 return AArch64::STPQpre; 341 case AArch64::STPWi: 342 return AArch64::STPWpre; 343 case AArch64::STPXi: 344 return AArch64::STPXpre; 345 } 346 } 347 348 static unsigned getPostIndexedOpcode(unsigned Opc) { 349 switch (Opc) { 350 default: 351 llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 352 case AArch64::STRSui: 353 return AArch64::STRSpost; 354 case AArch64::STRDui: 355 return AArch64::STRDpost; 356 case AArch64::STRQui: 357 return AArch64::STRQpost; 358 case AArch64::STRBBui: 359 return AArch64::STRBBpost; 360 case AArch64::STRHHui: 361 return AArch64::STRHHpost; 362 case AArch64::STRWui: 363 return AArch64::STRWpost; 364 case AArch64::STRXui: 365 return AArch64::STRXpost; 366 case AArch64::LDRSui: 367 return AArch64::LDRSpost; 368 case AArch64::LDRDui: 369 return AArch64::LDRDpost; 370 case AArch64::LDRQui: 371 return AArch64::LDRQpost; 372 case AArch64::LDRBBui: 373 return AArch64::LDRBBpost; 374 case AArch64::LDRHHui: 375 return AArch64::LDRHHpost; 376 case AArch64::LDRWui: 377 return AArch64::LDRWpost; 378 case AArch64::LDRXui: 379 return AArch64::LDRXpost; 380 case AArch64::LDRSWui: 381 return AArch64::LDRSWpost; 382 case AArch64::LDPSi: 383 return AArch64::LDPSpost; 384 case AArch64::LDPSWi: 385 return AArch64::LDPSWpost; 386 case AArch64::LDPDi: 387 return AArch64::LDPDpost; 388 case AArch64::LDPQi: 389 return AArch64::LDPQpost; 390 case AArch64::LDPWi: 391 return AArch64::LDPWpost; 392 case AArch64::LDPXi: 393 return AArch64::LDPXpost; 394 case AArch64::STPSi: 395 return AArch64::STPSpost; 396 case AArch64::STPDi: 397 return AArch64::STPDpost; 398 case AArch64::STPQi: 399 return AArch64::STPQpost; 400 case AArch64::STPWi: 401 return AArch64::STPWpost; 402 case AArch64::STPXi: 403 return AArch64::STPXpost; 404 } 405 } 406 407 static bool isPairedLdSt(const MachineInstr *MI) { 408 switch (MI->getOpcode()) { 409 default: 410 return false; 411 case AArch64::LDPSi: 412 case AArch64::LDPSWi: 413 case AArch64::LDPDi: 414 case AArch64::LDPQi: 415 case AArch64::LDPWi: 416 case AArch64::LDPXi: 417 case AArch64::STPSi: 418 case AArch64::STPDi: 419 case AArch64::STPQi: 420 case AArch64::STPWi: 421 case AArch64::STPXi: 422 return true; 423 } 424 } 425 426 static const MachineOperand &getLdStRegOp(const MachineInstr *MI, 427 unsigned PairedRegOp = 0) { 428 assert(PairedRegOp < 2 && "Unexpected register operand idx."); 429 unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; 430 return MI->getOperand(Idx); 431 } 432 433 static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) { 434 unsigned Idx = isPairedLdSt(MI) ? 2 : 1; 435 return MI->getOperand(Idx); 436 } 437 438 static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { 439 unsigned Idx = isPairedLdSt(MI) ? 3 : 2; 440 return MI->getOperand(Idx); 441 } 442 443 MachineBasicBlock::iterator 444 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 445 MachineBasicBlock::iterator Paired, 446 const LdStPairFlags &Flags) { 447 MachineBasicBlock::iterator NextI = I; 448 ++NextI; 449 // If NextI is the second of the two instructions to be merged, we need 450 // to skip one further. Either way we merge will invalidate the iterator, 451 // and we don't need to scan the new instruction, as it's a pairwise 452 // instruction, which we're not considering for further action anyway. 453 if (NextI == Paired) 454 ++NextI; 455 456 int SExtIdx = Flags.getSExtIdx(); 457 unsigned Opc = 458 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 459 bool IsUnscaled = isUnscaledLdSt(Opc); 460 int OffsetStride = IsUnscaled ? getMemScale(I) : 1; 461 462 bool MergeForward = Flags.getMergeForward(); 463 unsigned NewOpc = getMatchingPairOpcode(Opc); 464 // Insert our new paired instruction after whichever of the paired 465 // instructions MergeForward indicates. 466 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 467 // Also based on MergeForward is from where we copy the base register operand 468 // so we get the flags compatible with the input code. 469 const MachineOperand &BaseRegOp = 470 MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); 471 472 // Which register is Rt and which is Rt2 depends on the offset order. 473 MachineInstr *RtMI, *Rt2MI; 474 if (getLdStOffsetOp(I).getImm() == 475 getLdStOffsetOp(Paired).getImm() + OffsetStride) { 476 RtMI = Paired; 477 Rt2MI = I; 478 // Here we swapped the assumption made for SExtIdx. 479 // I.e., we turn ldp I, Paired into ldp Paired, I. 480 // Update the index accordingly. 481 if (SExtIdx != -1) 482 SExtIdx = (SExtIdx + 1) % 2; 483 } else { 484 RtMI = I; 485 Rt2MI = Paired; 486 } 487 // Handle Unscaled 488 int OffsetImm = getLdStOffsetOp(RtMI).getImm(); 489 if (IsUnscaled) 490 OffsetImm /= OffsetStride; 491 492 // Construct the new instruction. 493 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, 494 I->getDebugLoc(), TII->get(NewOpc)) 495 .addOperand(getLdStRegOp(RtMI)) 496 .addOperand(getLdStRegOp(Rt2MI)) 497 .addOperand(BaseRegOp) 498 .addImm(OffsetImm); 499 (void)MIB; 500 501 // FIXME: Do we need/want to copy the mem operands from the source 502 // instructions? Probably. What uses them after this? 503 504 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 505 DEBUG(I->print(dbgs())); 506 DEBUG(dbgs() << " "); 507 DEBUG(Paired->print(dbgs())); 508 DEBUG(dbgs() << " with instruction:\n "); 509 510 if (SExtIdx != -1) { 511 // Generate the sign extension for the proper result of the ldp. 512 // I.e., with X1, that would be: 513 // %W1<def> = KILL %W1, %X1<imp-def> 514 // %X1<def> = SBFMXri %X1<kill>, 0, 31 515 MachineOperand &DstMO = MIB->getOperand(SExtIdx); 516 // Right now, DstMO has the extended register, since it comes from an 517 // extended opcode. 518 unsigned DstRegX = DstMO.getReg(); 519 // Get the W variant of that register. 520 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 521 // Update the result of LDP to use the W instead of the X variant. 522 DstMO.setReg(DstRegW); 523 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 524 DEBUG(dbgs() << "\n"); 525 // Make the machine verifier happy by providing a definition for 526 // the X register. 527 // Insert this definition right after the generated LDP, i.e., before 528 // InsertionPoint. 529 MachineInstrBuilder MIBKill = 530 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 531 TII->get(TargetOpcode::KILL), DstRegW) 532 .addReg(DstRegW) 533 .addReg(DstRegX, RegState::Define); 534 MIBKill->getOperand(2).setImplicit(); 535 // Create the sign extension. 536 MachineInstrBuilder MIBSXTW = 537 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 538 TII->get(AArch64::SBFMXri), DstRegX) 539 .addReg(DstRegX) 540 .addImm(0) 541 .addImm(31); 542 (void)MIBSXTW; 543 DEBUG(dbgs() << " Extend operand:\n "); 544 DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 545 DEBUG(dbgs() << "\n"); 546 } else { 547 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 548 DEBUG(dbgs() << "\n"); 549 } 550 551 // Erase the old instructions. 552 I->eraseFromParent(); 553 Paired->eraseFromParent(); 554 555 return NextI; 556 } 557 558 /// trackRegDefsUses - Remember what registers the specified instruction uses 559 /// and modifies. 560 static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, 561 BitVector &UsedRegs, 562 const TargetRegisterInfo *TRI) { 563 for (const MachineOperand &MO : MI->operands()) { 564 if (MO.isRegMask()) 565 ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 566 567 if (!MO.isReg()) 568 continue; 569 unsigned Reg = MO.getReg(); 570 if (MO.isDef()) { 571 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 572 ModifiedRegs.set(*AI); 573 } else { 574 assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 575 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 576 UsedRegs.set(*AI); 577 } 578 } 579 } 580 581 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 582 // Convert the byte-offset used by unscaled into an "element" offset used 583 // by the scaled pair load/store instructions. 584 if (IsUnscaled) 585 Offset /= OffsetStride; 586 587 return Offset <= 63 && Offset >= -64; 588 } 589 590 // Do alignment, specialized to power of 2 and for signed ints, 591 // avoiding having to do a C-style cast from uint_64t to int when 592 // using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 593 // FIXME: Move this function to include/MathExtras.h? 594 static int alignTo(int Num, int PowOf2) { 595 return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 596 } 597 598 static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb, 599 const AArch64InstrInfo *TII) { 600 // One of the instructions must modify memory. 601 if (!MIa->mayStore() && !MIb->mayStore()) 602 return false; 603 604 // Both instructions must be memory operations. 605 if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore()) 606 return false; 607 608 return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); 609 } 610 611 static bool mayAlias(MachineInstr *MIa, 612 SmallVectorImpl<MachineInstr *> &MemInsns, 613 const AArch64InstrInfo *TII) { 614 for (auto &MIb : MemInsns) 615 if (mayAlias(MIa, MIb, TII)) 616 return true; 617 618 return false; 619 } 620 621 /// findMatchingInsn - Scan the instructions looking for a load/store that can 622 /// be combined with the current instruction into a load/store pair. 623 MachineBasicBlock::iterator 624 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 625 LdStPairFlags &Flags, 626 unsigned Limit) { 627 MachineBasicBlock::iterator E = I->getParent()->end(); 628 MachineBasicBlock::iterator MBBI = I; 629 MachineInstr *FirstMI = I; 630 ++MBBI; 631 632 unsigned Opc = FirstMI->getOpcode(); 633 bool MayLoad = FirstMI->mayLoad(); 634 bool IsUnscaled = isUnscaledLdSt(FirstMI); 635 unsigned Reg = getLdStRegOp(FirstMI).getReg(); 636 unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); 637 int Offset = getLdStOffsetOp(FirstMI).getImm(); 638 639 // Early exit if the first instruction modifies the base register. 640 // e.g., ldr x0, [x0] 641 if (FirstMI->modifiesRegister(BaseReg, TRI)) 642 return E; 643 644 // Early exit if the offset if not possible to match. (6 bits of positive 645 // range, plus allow an extra one in case we find a later insn that matches 646 // with Offset-1) 647 int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; 648 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 649 return E; 650 651 // Track which registers have been modified and used between the first insn 652 // (inclusive) and the second insn. 653 BitVector ModifiedRegs, UsedRegs; 654 ModifiedRegs.resize(TRI->getNumRegs()); 655 UsedRegs.resize(TRI->getNumRegs()); 656 657 // Remember any instructions that read/write memory between FirstMI and MI. 658 SmallVector<MachineInstr *, 4> MemInsns; 659 660 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 661 MachineInstr *MI = MBBI; 662 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 663 // optimization by changing how far we scan. 664 if (MI->isDebugValue()) 665 continue; 666 667 // Now that we know this is a real instruction, count it. 668 ++Count; 669 670 bool CanMergeOpc = Opc == MI->getOpcode(); 671 Flags.setSExtIdx(-1); 672 if (!CanMergeOpc) { 673 bool IsValidLdStrOpc; 674 unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); 675 assert(IsValidLdStrOpc && 676 "Given Opc should be a Load or Store with an immediate"); 677 // Opc will be the first instruction in the pair. 678 Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); 679 CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); 680 } 681 682 if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { 683 assert(MI->mayLoadOrStore() && "Expected memory operation."); 684 // If we've found another instruction with the same opcode, check to see 685 // if the base and offset are compatible with our starting instruction. 686 // These instructions all have scaled immediate operands, so we just 687 // check for +1/-1. Make sure to check the new instruction offset is 688 // actually an immediate and not a symbolic reference destined for 689 // a relocation. 690 // 691 // Pairwise instructions have a 7-bit signed offset field. Single insns 692 // have a 12-bit unsigned offset field. To be a valid combine, the 693 // final offset must be in range. 694 unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); 695 int MIOffset = getLdStOffsetOp(MI).getImm(); 696 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 697 (Offset + OffsetStride == MIOffset))) { 698 int MinOffset = Offset < MIOffset ? Offset : MIOffset; 699 // If this is a volatile load/store that otherwise matched, stop looking 700 // as something is going on that we don't have enough information to 701 // safely transform. Similarly, stop if we see a hint to avoid pairs. 702 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 703 return E; 704 // If the resultant immediate offset of merging these instructions 705 // is out of range for a pairwise instruction, bail and keep looking. 706 bool MIIsUnscaled = isUnscaledLdSt(MI); 707 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 708 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 709 MemInsns.push_back(MI); 710 continue; 711 } 712 // If the alignment requirements of the paired (scaled) instruction 713 // can't express the offset of the unscaled input, bail and keep 714 // looking. 715 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { 716 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 717 MemInsns.push_back(MI); 718 continue; 719 } 720 // If the destination register of the loads is the same register, bail 721 // and keep looking. A load-pair instruction with both destination 722 // registers the same is UNPREDICTABLE and will result in an exception. 723 if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { 724 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 725 MemInsns.push_back(MI); 726 continue; 727 } 728 729 // If the Rt of the second instruction was not modified or used between 730 // the two instructions and none of the instructions between the second 731 // and first alias with the second, we can combine the second into the 732 // first. 733 if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && 734 !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && 735 !mayAlias(MI, MemInsns, TII)) { 736 Flags.setMergeForward(false); 737 return MBBI; 738 } 739 740 // Likewise, if the Rt of the first instruction is not modified or used 741 // between the two instructions and none of the instructions between the 742 // first and the second alias with the first, we can combine the first 743 // into the second. 744 if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && 745 !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && 746 !mayAlias(FirstMI, MemInsns, TII)) { 747 Flags.setMergeForward(true); 748 return MBBI; 749 } 750 // Unable to combine these instructions due to interference in between. 751 // Keep looking. 752 } 753 } 754 755 // If the instruction wasn't a matching load or store. Stop searching if we 756 // encounter a call instruction that might modify memory. 757 if (MI->isCall()) 758 return E; 759 760 // Update modified / uses register lists. 761 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 762 763 // Otherwise, if the base register is modified, we have no match, so 764 // return early. 765 if (ModifiedRegs[BaseReg]) 766 return E; 767 768 // Update list of instructions that read/write memory. 769 if (MI->mayLoadOrStore()) 770 MemInsns.push_back(MI); 771 } 772 return E; 773 } 774 775 MachineBasicBlock::iterator 776 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, 777 MachineBasicBlock::iterator Update, 778 bool IsPreIdx) { 779 assert((Update->getOpcode() == AArch64::ADDXri || 780 Update->getOpcode() == AArch64::SUBXri) && 781 "Unexpected base register update instruction to merge!"); 782 MachineBasicBlock::iterator NextI = I; 783 // Return the instruction following the merged instruction, which is 784 // the instruction following our unmerged load. Unless that's the add/sub 785 // instruction we're merging, in which case it's the one after that. 786 if (++NextI == Update) 787 ++NextI; 788 789 int Value = Update->getOperand(2).getImm(); 790 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 791 "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); 792 if (Update->getOpcode() == AArch64::SUBXri) 793 Value = -Value; 794 795 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) 796 : getPostIndexedOpcode(I->getOpcode()); 797 MachineInstrBuilder MIB; 798 if (!isPairedLdSt(I)) { 799 // Non-paired instruction. 800 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 801 .addOperand(getLdStRegOp(Update)) 802 .addOperand(getLdStRegOp(I)) 803 .addOperand(getLdStBaseOp(I)) 804 .addImm(Value); 805 } else { 806 // Paired instruction. 807 int Scale = getMemScale(I); 808 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 809 .addOperand(getLdStRegOp(Update)) 810 .addOperand(getLdStRegOp(I, 0)) 811 .addOperand(getLdStRegOp(I, 1)) 812 .addOperand(getLdStBaseOp(I)) 813 .addImm(Value / Scale); 814 } 815 (void)MIB; 816 817 if (IsPreIdx) 818 DEBUG(dbgs() << "Creating pre-indexed load/store."); 819 else 820 DEBUG(dbgs() << "Creating post-indexed load/store."); 821 DEBUG(dbgs() << " Replacing instructions:\n "); 822 DEBUG(I->print(dbgs())); 823 DEBUG(dbgs() << " "); 824 DEBUG(Update->print(dbgs())); 825 DEBUG(dbgs() << " with instruction:\n "); 826 DEBUG(((MachineInstr *)MIB)->print(dbgs())); 827 DEBUG(dbgs() << "\n"); 828 829 // Erase the old instructions for the block. 830 I->eraseFromParent(); 831 Update->eraseFromParent(); 832 833 return NextI; 834 } 835 836 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, 837 MachineInstr *MI, 838 unsigned BaseReg, int Offset) { 839 switch (MI->getOpcode()) { 840 default: 841 break; 842 case AArch64::SUBXri: 843 // Negate the offset for a SUB instruction. 844 Offset *= -1; 845 // FALLTHROUGH 846 case AArch64::ADDXri: 847 // Make sure it's a vanilla immediate operand, not a relocation or 848 // anything else we can't handle. 849 if (!MI->getOperand(2).isImm()) 850 break; 851 // Watch out for 1 << 12 shifted value. 852 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 853 break; 854 855 // The update instruction source and destination register must be the 856 // same as the load/store base register. 857 if (MI->getOperand(0).getReg() != BaseReg || 858 MI->getOperand(1).getReg() != BaseReg) 859 break; 860 861 bool IsPairedInsn = isPairedLdSt(MemMI); 862 int UpdateOffset = MI->getOperand(2).getImm(); 863 // For non-paired load/store instructions, the immediate must fit in a 864 // signed 9-bit integer. 865 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) 866 break; 867 868 // For paired load/store instructions, the immediate must be a multiple of 869 // the scaling factor. The scaled offset must also fit into a signed 7-bit 870 // integer. 871 if (IsPairedInsn) { 872 int Scale = getMemScale(MemMI); 873 if (UpdateOffset % Scale != 0) 874 break; 875 876 int ScaledOffset = UpdateOffset / Scale; 877 if (ScaledOffset > 64 || ScaledOffset < -64) 878 break; 879 } 880 881 // If we have a non-zero Offset, we check that it matches the amount 882 // we're adding to the register. 883 if (!Offset || Offset == MI->getOperand(2).getImm()) 884 return true; 885 break; 886 } 887 return false; 888 } 889 890 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 891 MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) { 892 MachineBasicBlock::iterator E = I->getParent()->end(); 893 MachineInstr *MemMI = I; 894 MachineBasicBlock::iterator MBBI = I; 895 896 unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 897 int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); 898 899 // Scan forward looking for post-index opportunities. Updating instructions 900 // can't be formed if the memory instruction doesn't have the offset we're 901 // looking for. 902 if (MIUnscaledOffset != UnscaledOffset) 903 return E; 904 905 // If the base register overlaps a destination register, we can't 906 // merge the update. 907 bool IsPairedInsn = isPairedLdSt(MemMI); 908 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 909 unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 910 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 911 return E; 912 } 913 914 // Track which registers have been modified and used between the first insn 915 // (inclusive) and the second insn. 916 BitVector ModifiedRegs, UsedRegs; 917 ModifiedRegs.resize(TRI->getNumRegs()); 918 UsedRegs.resize(TRI->getNumRegs()); 919 ++MBBI; 920 for (unsigned Count = 0; MBBI != E; ++MBBI) { 921 MachineInstr *MI = MBBI; 922 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 923 // optimization by changing how far we scan. 924 if (MI->isDebugValue()) 925 continue; 926 927 // Now that we know this is a real instruction, count it. 928 ++Count; 929 930 // If we found a match, return it. 931 if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset)) 932 return MBBI; 933 934 // Update the status of what the instruction clobbered and used. 935 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 936 937 // Otherwise, if the base register is used or modified, we have no match, so 938 // return early. 939 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 940 return E; 941 } 942 return E; 943 } 944 945 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 946 MachineBasicBlock::iterator I, unsigned Limit) { 947 MachineBasicBlock::iterator B = I->getParent()->begin(); 948 MachineBasicBlock::iterator E = I->getParent()->end(); 949 MachineInstr *MemMI = I; 950 MachineBasicBlock::iterator MBBI = I; 951 952 unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); 953 int Offset = getLdStOffsetOp(MemMI).getImm(); 954 955 // If the load/store is the first instruction in the block, there's obviously 956 // not any matching update. Ditto if the memory offset isn't zero. 957 if (MBBI == B || Offset != 0) 958 return E; 959 // If the base register overlaps a destination register, we can't 960 // merge the update. 961 bool IsPairedInsn = isPairedLdSt(MemMI); 962 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 963 unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); 964 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 965 return E; 966 } 967 968 // Track which registers have been modified and used between the first insn 969 // (inclusive) and the second insn. 970 BitVector ModifiedRegs, UsedRegs; 971 ModifiedRegs.resize(TRI->getNumRegs()); 972 UsedRegs.resize(TRI->getNumRegs()); 973 --MBBI; 974 for (unsigned Count = 0; MBBI != B; --MBBI) { 975 MachineInstr *MI = MBBI; 976 // Skip DBG_VALUE instructions. Otherwise debug info can affect the 977 // optimization by changing how far we scan. 978 if (MI->isDebugValue()) 979 continue; 980 981 // Now that we know this is a real instruction, count it. 982 ++Count; 983 984 // If we found a match, return it. 985 if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) 986 return MBBI; 987 988 // Update the status of what the instruction clobbered and used. 989 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 990 991 // Otherwise, if the base register is used or modified, we have no match, so 992 // return early. 993 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 994 return E; 995 } 996 return E; 997 } 998 999 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { 1000 bool Modified = false; 1001 // Two tranformations to do here: 1002 // 1) Find loads and stores that can be merged into a single load or store 1003 // pair instruction. 1004 // e.g., 1005 // ldr x0, [x2] 1006 // ldr x1, [x2, #8] 1007 // ; becomes 1008 // ldp x0, x1, [x2] 1009 // 2) Find base register updates that can be merged into the load or store 1010 // as a base-reg writeback. 1011 // e.g., 1012 // ldr x0, [x2] 1013 // add x2, x2, #4 1014 // ; becomes 1015 // ldr x0, [x2], #4 1016 1017 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1018 MBBI != E;) { 1019 MachineInstr *MI = MBBI; 1020 switch (MI->getOpcode()) { 1021 default: 1022 // Just move on to the next instruction. 1023 ++MBBI; 1024 break; 1025 // Scaled instructions. 1026 case AArch64::STRSui: 1027 case AArch64::STRDui: 1028 case AArch64::STRQui: 1029 case AArch64::STRXui: 1030 case AArch64::STRWui: 1031 case AArch64::LDRSui: 1032 case AArch64::LDRDui: 1033 case AArch64::LDRQui: 1034 case AArch64::LDRXui: 1035 case AArch64::LDRWui: 1036 case AArch64::LDRSWui: 1037 // Unscaled instructions. 1038 case AArch64::STURSi: 1039 case AArch64::STURDi: 1040 case AArch64::STURQi: 1041 case AArch64::STURWi: 1042 case AArch64::STURXi: 1043 case AArch64::LDURSi: 1044 case AArch64::LDURDi: 1045 case AArch64::LDURQi: 1046 case AArch64::LDURWi: 1047 case AArch64::LDURXi: 1048 case AArch64::LDURSWi: { 1049 // If this is a volatile load/store, don't mess with it. 1050 if (MI->hasOrderedMemoryRef()) { 1051 ++MBBI; 1052 break; 1053 } 1054 // Make sure this is a reg+imm (as opposed to an address reloc). 1055 if (!getLdStOffsetOp(MI).isImm()) { 1056 ++MBBI; 1057 break; 1058 } 1059 // Check if this load/store has a hint to avoid pair formation. 1060 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1061 if (TII->isLdStPairSuppressed(MI)) { 1062 ++MBBI; 1063 break; 1064 } 1065 // Look ahead up to ScanLimit instructions for a pairable instruction. 1066 LdStPairFlags Flags; 1067 MachineBasicBlock::iterator Paired = 1068 findMatchingInsn(MBBI, Flags, ScanLimit); 1069 if (Paired != E) { 1070 ++NumPairCreated; 1071 if (isUnscaledLdSt(MI)) 1072 ++NumUnscaledPairCreated; 1073 1074 // Merge the loads into a pair. Keeping the iterator straight is a 1075 // pain, so we let the merge routine tell us what the next instruction 1076 // is after it's done mucking about. 1077 MBBI = mergePairedInsns(MBBI, Paired, Flags); 1078 Modified = true; 1079 break; 1080 } 1081 ++MBBI; 1082 break; 1083 } 1084 // FIXME: Do the other instructions. 1085 } 1086 } 1087 1088 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1089 MBBI != E;) { 1090 MachineInstr *MI = MBBI; 1091 // Do update merging. It's simpler to keep this separate from the above 1092 // switch, though not strictly necessary. 1093 unsigned Opc = MI->getOpcode(); 1094 switch (Opc) { 1095 default: 1096 // Just move on to the next instruction. 1097 ++MBBI; 1098 break; 1099 // Scaled instructions. 1100 case AArch64::STRSui: 1101 case AArch64::STRDui: 1102 case AArch64::STRQui: 1103 case AArch64::STRXui: 1104 case AArch64::STRWui: 1105 case AArch64::STRHHui: 1106 case AArch64::STRBBui: 1107 case AArch64::LDRSui: 1108 case AArch64::LDRDui: 1109 case AArch64::LDRQui: 1110 case AArch64::LDRXui: 1111 case AArch64::LDRWui: 1112 case AArch64::LDRHHui: 1113 case AArch64::LDRBBui: 1114 // Unscaled instructions. 1115 case AArch64::STURSi: 1116 case AArch64::STURDi: 1117 case AArch64::STURQi: 1118 case AArch64::STURWi: 1119 case AArch64::STURXi: 1120 case AArch64::LDURSi: 1121 case AArch64::LDURDi: 1122 case AArch64::LDURQi: 1123 case AArch64::LDURWi: 1124 case AArch64::LDURXi: 1125 // Paired instructions. 1126 case AArch64::LDPSi: 1127 case AArch64::LDPSWi: 1128 case AArch64::LDPDi: 1129 case AArch64::LDPQi: 1130 case AArch64::LDPWi: 1131 case AArch64::LDPXi: 1132 case AArch64::STPSi: 1133 case AArch64::STPDi: 1134 case AArch64::STPQi: 1135 case AArch64::STPWi: 1136 case AArch64::STPXi: { 1137 // Make sure this is a reg+imm (as opposed to an address reloc). 1138 if (!getLdStOffsetOp(MI).isImm()) { 1139 ++MBBI; 1140 break; 1141 } 1142 // Look forward to try to form a post-index instruction. For example, 1143 // ldr x0, [x20] 1144 // add x20, x20, #32 1145 // merged into: 1146 // ldr x0, [x20], #32 1147 MachineBasicBlock::iterator Update = 1148 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 1149 if (Update != E) { 1150 // Merge the update into the ld/st. 1151 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); 1152 Modified = true; 1153 ++NumPostFolded; 1154 break; 1155 } 1156 // Don't know how to handle pre/post-index versions, so move to the next 1157 // instruction. 1158 if (isUnscaledLdSt(Opc)) { 1159 ++MBBI; 1160 break; 1161 } 1162 1163 // Look back to try to find a pre-index instruction. For example, 1164 // add x0, x0, #8 1165 // ldr x1, [x0] 1166 // merged into: 1167 // ldr x1, [x0, #8]! 1168 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 1169 if (Update != E) { 1170 // Merge the update into the ld/st. 1171 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1172 Modified = true; 1173 ++NumPreFolded; 1174 break; 1175 } 1176 // The immediate in the load/store is scaled by the size of the memory 1177 // operation. The immediate in the add we're looking for, 1178 // however, is not, so adjust here. 1179 int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); 1180 1181 // Look forward to try to find a post-index instruction. For example, 1182 // ldr x1, [x0, #64] 1183 // add x0, x0, #64 1184 // merged into: 1185 // ldr x1, [x0, #64]! 1186 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset); 1187 if (Update != E) { 1188 // Merge the update into the ld/st. 1189 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 1190 Modified = true; 1191 ++NumPreFolded; 1192 break; 1193 } 1194 1195 // Nothing found. Just move to the next instruction. 1196 ++MBBI; 1197 break; 1198 } 1199 // FIXME: Do the other instructions. 1200 } 1201 } 1202 1203 return Modified; 1204 } 1205 1206 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1207 TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 1208 TRI = Fn.getSubtarget().getRegisterInfo(); 1209 1210 bool Modified = false; 1211 for (auto &MBB : Fn) 1212 Modified |= optimizeBlock(MBB); 1213 1214 return Modified; 1215 } 1216 1217 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 1218 // loads and stores near one another? 1219 1220 /// createAArch64LoadStoreOptimizationPass - returns an instance of the 1221 /// load / store optimization pass. 1222 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 1223 return new AArch64LoadStoreOpt(); 1224 } 1225