1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs below peephole optimizations on MIR level. 10 // 11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13 // 14 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 15 // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 16 // 17 // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 18 // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 19 // 20 // The mov pseudo instruction could be expanded to multiple mov instructions 21 // later. In this case, we could try to split the constant operand of mov 22 // instruction into two immediates which can be directly encoded into 23 // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 24 // multiple `mov` + `and/add/sub` instructions. 25 // 26 // 4. Remove redundant ORRWrs which is generated by zero-extend. 27 // 28 // %3:gpr32 = ORRWrs $wzr, %2, 0 29 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30 // 31 // If AArch64's 32-bit form of instruction defines the source operand of 32 // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33 // operand are set to zero. 34 // 35 //===----------------------------------------------------------------------===// 36 37 #include "AArch64ExpandImm.h" 38 #include "AArch64InstrInfo.h" 39 #include "MCTargetDesc/AArch64AddressingModes.h" 40 #include "llvm/ADT/Optional.h" 41 #include "llvm/ADT/SetVector.h" 42 #include "llvm/CodeGen/MachineDominators.h" 43 #include "llvm/CodeGen/MachineLoopInfo.h" 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "aarch64-mi-peephole-opt" 48 49 namespace { 50 51 struct AArch64MIPeepholeOpt : public MachineFunctionPass { 52 static char ID; 53 54 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 55 initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 56 } 57 58 const AArch64InstrInfo *TII; 59 const AArch64RegisterInfo *TRI; 60 MachineLoopInfo *MLI; 61 MachineRegisterInfo *MRI; 62 63 using OpcodePair = std::pair<unsigned, unsigned>; 64 template <typename T> 65 using SplitAndOpcFunc = 66 std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>; 67 using BuildMIFunc = 68 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, 69 Register, Register, Register)>; 70 71 /// For instructions where an immediate operand could be split into two 72 /// separate immediate instructions, use the splitTwoPartImm two handle the 73 /// optimization. 74 /// 75 /// To implement, the following function types must be passed to 76 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 77 /// splitting the immediate is valid and returns the associated new opcode. A 78 /// BuildMIFunc must be implemented to build the two immediate instructions. 79 /// 80 /// Example Pattern (where IMM would require 2+ MOV instructions): 81 /// %dst = <Instr>rr %src IMM [...] 82 /// becomes: 83 /// %tmp = <Instr>ri %src (encode half IMM) [...] 84 /// %dst = <Instr>ri %tmp (encode half IMM) [...] 85 template <typename T> 86 bool splitTwoPartImm(MachineInstr &MI, 87 SmallSetVector<MachineInstr *, 8> &ToBeRemoved, 88 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 89 90 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 91 MachineInstr *&SubregToRegMI); 92 93 template <typename T> 94 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, 95 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 96 template <typename T> 97 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI, 98 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 99 100 template <typename T> 101 bool visitAND(unsigned Opc, MachineInstr &MI, 102 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 103 bool visitORR(MachineInstr &MI, 104 SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 105 bool runOnMachineFunction(MachineFunction &MF) override; 106 107 StringRef getPassName() const override { 108 return "AArch64 MI Peephole Optimization pass"; 109 } 110 111 void getAnalysisUsage(AnalysisUsage &AU) const override { 112 AU.setPreservesCFG(); 113 AU.addRequired<MachineLoopInfo>(); 114 MachineFunctionPass::getAnalysisUsage(AU); 115 } 116 }; 117 118 char AArch64MIPeepholeOpt::ID = 0; 119 120 } // end anonymous namespace 121 122 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 123 "AArch64 MI Peephole Optimization", false, false) 124 125 template <typename T> 126 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 127 T UImm = static_cast<T>(Imm); 128 if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 129 return false; 130 131 // If this immediate can be handled by one instruction, do not split it. 132 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 133 AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 134 if (Insn.size() == 1) 135 return false; 136 137 // The bitmask immediate consists of consecutive ones. Let's say there is 138 // constant 0b00000000001000000000010000000000 which does not consist of 139 // consecutive ones. We can split it in to two bitmask immediate like 140 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 141 // If we do AND with these two bitmask immediate, we can see original one. 142 unsigned LowestBitSet = countTrailingZeros(UImm); 143 unsigned HighestBitSet = Log2_64(UImm); 144 145 // Create a mask which is filled with one from the position of lowest bit set 146 // to the position of highest bit set. 147 T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 148 (static_cast<T>(1) << LowestBitSet); 149 // Create a mask which is filled with one outside the position of lowest bit 150 // set and the position of highest bit set. 151 T NewImm2 = UImm | ~NewImm1; 152 153 // If the split value is not valid bitmask immediate, do not split this 154 // constant. 155 if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 156 return false; 157 158 Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 159 Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 160 return true; 161 } 162 163 template <typename T> 164 bool AArch64MIPeepholeOpt::visitAND( 165 unsigned Opc, MachineInstr &MI, 166 SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 167 // Try below transformation. 168 // 169 // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 170 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 171 // 172 // The mov pseudo instruction could be expanded to multiple mov instructions 173 // later. Let's try to split the constant operand of mov instruction into two 174 // bitmask immediates. It makes only two AND instructions intead of multiple 175 // mov + and instructions. 176 177 return splitTwoPartImm<T>( 178 MI, ToBeRemoved, 179 [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> { 180 if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 181 return std::make_pair(Opc, Opc); 182 return None; 183 }, 184 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 185 unsigned Imm1, Register SrcReg, Register NewTmpReg, 186 Register NewDstReg) { 187 DebugLoc DL = MI.getDebugLoc(); 188 MachineBasicBlock *MBB = MI.getParent(); 189 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 190 .addReg(SrcReg) 191 .addImm(Imm0); 192 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 193 .addReg(NewTmpReg) 194 .addImm(Imm1); 195 }); 196 } 197 198 bool AArch64MIPeepholeOpt::visitORR( 199 MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 200 // Check this ORR comes from below zero-extend pattern. 201 // 202 // def : Pat<(i64 (zext GPR32:$src)), 203 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 204 if (MI.getOperand(3).getImm() != 0) 205 return false; 206 207 if (MI.getOperand(1).getReg() != AArch64::WZR) 208 return false; 209 210 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 211 if (!SrcMI) 212 return false; 213 214 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 215 // 216 // When you use the 32-bit form of an instruction, the upper 32 bits of the 217 // source registers are ignored and the upper 32 bits of the destination 218 // register are set to zero. 219 // 220 // If AArch64's 32-bit form of instruction defines the source operand of 221 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 222 // real AArch64 instruction and if it is not, do not process the opcode 223 // conservatively. 224 if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 225 return false; 226 227 Register DefReg = MI.getOperand(0).getReg(); 228 Register SrcReg = MI.getOperand(2).getReg(); 229 MRI->replaceRegWith(DefReg, SrcReg); 230 MRI->clearKillFlags(SrcReg); 231 // replaceRegWith changes MI's definition register. Keep it for SSA form until 232 // deleting MI. 233 MI.getOperand(0).setReg(DefReg); 234 ToBeRemoved.insert(&MI); 235 236 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 237 238 return true; 239 } 240 241 template <typename T> 242 static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 243 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 244 // imm0 and imm1 are non-zero 12-bit unsigned int. 245 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 246 (Imm & ~static_cast<T>(0xffffff)) != 0) 247 return false; 248 249 // The immediate can not be composed via a single instruction. 250 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 251 AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 252 if (Insn.size() == 1) 253 return false; 254 255 // Split Imm into (Imm0 << 12) + Imm1; 256 Imm0 = (Imm >> 12) & 0xfff; 257 Imm1 = Imm & 0xfff; 258 return true; 259 } 260 261 template <typename T> 262 bool AArch64MIPeepholeOpt::visitADDSUB( 263 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, 264 SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 265 // Try below transformation. 266 // 267 // MOVi32imm + ADDWrr ==> ADDWri + ADDWri 268 // MOVi64imm + ADDXrr ==> ADDXri + ADDXri 269 // 270 // MOVi32imm + SUBWrr ==> SUBWri + SUBWri 271 // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 272 // 273 // The mov pseudo instruction could be expanded to multiple mov instructions 274 // later. Let's try to split the constant operand of mov instruction into two 275 // legal add/sub immediates. It makes only two ADD/SUB instructions intead of 276 // multiple `mov` + `and/sub` instructions. 277 278 return splitTwoPartImm<T>( 279 MI, ToBeRemoved, 280 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 281 T &Imm1) -> Optional<OpcodePair> { 282 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 283 return std::make_pair(PosOpc, PosOpc); 284 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 285 return std::make_pair(NegOpc, NegOpc); 286 return None; 287 }, 288 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 289 unsigned Imm1, Register SrcReg, Register NewTmpReg, 290 Register NewDstReg) { 291 DebugLoc DL = MI.getDebugLoc(); 292 MachineBasicBlock *MBB = MI.getParent(); 293 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 294 .addReg(SrcReg) 295 .addImm(Imm0) 296 .addImm(12); 297 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 298 .addReg(NewTmpReg) 299 .addImm(Imm1) 300 .addImm(0); 301 }); 302 } 303 304 template <typename T> 305 bool AArch64MIPeepholeOpt::visitADDSSUBS( 306 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI, 307 SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 308 // Try the same transformation as ADDSUB but with additional requirement 309 // that the condition code usages are only for Equal and Not Equal 310 return splitTwoPartImm<T>( 311 MI, ToBeRemoved, 312 [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI]( 313 T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> { 314 OpcodePair OP; 315 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 316 OP = PosOpcs; 317 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 318 OP = NegOpcs; 319 else 320 return None; 321 // Check conditional uses last since it is expensive for scanning 322 // proceeding instructions 323 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 324 Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); 325 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) 326 return None; 327 return OP; 328 }, 329 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 330 unsigned Imm1, Register SrcReg, Register NewTmpReg, 331 Register NewDstReg) { 332 DebugLoc DL = MI.getDebugLoc(); 333 MachineBasicBlock *MBB = MI.getParent(); 334 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 335 .addReg(SrcReg) 336 .addImm(Imm0) 337 .addImm(12); 338 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 339 .addReg(NewTmpReg) 340 .addImm(Imm1) 341 .addImm(0); 342 }); 343 } 344 345 // Checks if the corresponding MOV immediate instruction is applicable for 346 // this peephole optimization. 347 bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 348 MachineInstr *&MovMI, 349 MachineInstr *&SubregToRegMI) { 350 // Check whether current MBB is in loop and the AND is loop invariant. 351 MachineBasicBlock *MBB = MI.getParent(); 352 MachineLoop *L = MLI->getLoopFor(MBB); 353 if (L && !L->isLoopInvariant(MI)) 354 return false; 355 356 // Check whether current MI's operand is MOV with immediate. 357 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 358 if (!MovMI) 359 return false; 360 361 // If it is SUBREG_TO_REG, check its operand. 362 SubregToRegMI = nullptr; 363 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 364 SubregToRegMI = MovMI; 365 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 366 if (!MovMI) 367 return false; 368 } 369 370 if (MovMI->getOpcode() != AArch64::MOVi32imm && 371 MovMI->getOpcode() != AArch64::MOVi64imm) 372 return false; 373 374 // If the MOV has multiple uses, do not split the immediate because it causes 375 // more instructions. 376 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 377 return false; 378 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 379 return false; 380 381 // It is OK to perform this peephole optimization. 382 return true; 383 } 384 385 template <typename T> 386 bool AArch64MIPeepholeOpt::splitTwoPartImm( 387 MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved, 388 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 389 unsigned RegSize = sizeof(T) * 8; 390 assert((RegSize == 32 || RegSize == 64) && 391 "Invalid RegSize for legal immediate peephole optimization"); 392 393 // Perform several essential checks against current MI. 394 MachineInstr *MovMI, *SubregToRegMI; 395 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 396 return false; 397 398 // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 399 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 400 // For the 32 bit form of instruction, the upper 32 bits of the destination 401 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 402 // of Imm to zero. This is essential if the Immediate value was a negative 403 // number since it was sign extended when we assign to the 64-bit Imm. 404 if (SubregToRegMI) 405 Imm &= 0xFFFFFFFF; 406 OpcodePair Opcode; 407 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 408 Opcode = R.getValue(); 409 else 410 return false; 411 412 // Create new MIs using the first and second opcodes. Opcodes might differ for 413 // flag setting operations that should only set flags on second instruction. 414 // NewTmpReg = Opcode.first SrcReg Imm0 415 // NewDstReg = Opcode.second NewTmpReg Imm1 416 417 // Determine register classes for destinations and register operands 418 MachineFunction *MF = MI.getMF(); 419 const TargetRegisterClass *FirstInstrDstRC = 420 TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); 421 const TargetRegisterClass *FirstInstrOperandRC = 422 TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); 423 const TargetRegisterClass *SecondInstrDstRC = 424 (Opcode.first == Opcode.second) 425 ? FirstInstrDstRC 426 : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); 427 const TargetRegisterClass *SecondInstrOperandRC = 428 (Opcode.first == Opcode.second) 429 ? FirstInstrOperandRC 430 : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); 431 432 // Get old registers destinations and new register destinations 433 Register DstReg = MI.getOperand(0).getReg(); 434 Register SrcReg = MI.getOperand(1).getReg(); 435 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); 436 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to 437 // reuse that same destination register. 438 Register NewDstReg = DstReg.isVirtual() 439 ? MRI->createVirtualRegister(SecondInstrDstRC) 440 : DstReg; 441 442 // Constrain registers based on their new uses 443 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); 444 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); 445 if (DstReg != NewDstReg) 446 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 447 448 // Call the delegating operation to build the instruction 449 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 450 451 // replaceRegWith changes MI's definition register. Keep it for SSA form until 452 // deleting MI. Only if we made a new destination register. 453 if (DstReg != NewDstReg) { 454 MRI->replaceRegWith(DstReg, NewDstReg); 455 MI.getOperand(0).setReg(DstReg); 456 } 457 458 // Record the MIs need to be removed. 459 ToBeRemoved.insert(&MI); 460 if (SubregToRegMI) 461 ToBeRemoved.insert(SubregToRegMI); 462 ToBeRemoved.insert(MovMI); 463 464 return true; 465 } 466 467 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 468 if (skipFunction(MF.getFunction())) 469 return false; 470 471 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 472 TRI = static_cast<const AArch64RegisterInfo *>( 473 MF.getSubtarget().getRegisterInfo()); 474 MLI = &getAnalysis<MachineLoopInfo>(); 475 MRI = &MF.getRegInfo(); 476 477 assert(MRI->isSSA() && "Expected to be run on SSA form!"); 478 479 bool Changed = false; 480 SmallSetVector<MachineInstr *, 8> ToBeRemoved; 481 482 for (MachineBasicBlock &MBB : MF) { 483 for (MachineInstr &MI : MBB) { 484 switch (MI.getOpcode()) { 485 default: 486 break; 487 case AArch64::ANDWrr: 488 Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved); 489 break; 490 case AArch64::ANDXrr: 491 Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved); 492 break; 493 case AArch64::ORRWrs: 494 Changed = visitORR(MI, ToBeRemoved); 495 break; 496 case AArch64::ADDWrr: 497 Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI, 498 ToBeRemoved); 499 break; 500 case AArch64::SUBWrr: 501 Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI, 502 ToBeRemoved); 503 break; 504 case AArch64::ADDXrr: 505 Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI, 506 ToBeRemoved); 507 break; 508 case AArch64::SUBXrr: 509 Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI, 510 ToBeRemoved); 511 break; 512 case AArch64::ADDSWrr: 513 Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri}, 514 {AArch64::SUBWri, AArch64::SUBSWri}, 515 MI, ToBeRemoved); 516 break; 517 case AArch64::SUBSWrr: 518 Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri}, 519 {AArch64::ADDWri, AArch64::ADDSWri}, 520 MI, ToBeRemoved); 521 break; 522 case AArch64::ADDSXrr: 523 Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri}, 524 {AArch64::SUBXri, AArch64::SUBSXri}, 525 MI, ToBeRemoved); 526 break; 527 case AArch64::SUBSXrr: 528 Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri}, 529 {AArch64::ADDXri, AArch64::ADDSXri}, 530 MI, ToBeRemoved); 531 break; 532 } 533 } 534 } 535 536 for (MachineInstr *MI : ToBeRemoved) 537 MI->eraseFromParent(); 538 539 return Changed; 540 } 541 542 FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 543 return new AArch64MIPeepholeOpt(); 544 } 545