1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineModuleInfo.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCInst.h" 38 #include "llvm/MC/MCInstBuilder.h" 39 #include "llvm/MC/MCInstrDesc.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CodeGen.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Compiler.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/MathExtras.h" 46 #include "llvm/Target/TargetMachine.h" 47 #include "llvm/Target/TargetOptions.h" 48 #include <cassert> 49 #include <cstdint> 50 #include <iterator> 51 #include <utility> 52 53 using namespace llvm; 54 55 #define GET_INSTRINFO_CTOR_DTOR 56 #include "AArch64GenInstrInfo.inc" 57 58 static cl::opt<unsigned> TBZDisplacementBits( 59 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 60 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 61 62 static cl::opt<unsigned> CBZDisplacementBits( 63 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 64 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 65 66 static cl::opt<unsigned> 67 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 68 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 69 70 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 71 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 72 AArch64::CATCHRET), 73 RI(STI.getTargetTriple()), Subtarget(STI) {} 74 75 /// GetInstSize - Return the number of bytes of code the specified 76 /// instruction may be. This returns the maximum number of bytes. 77 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 78 const MachineBasicBlock &MBB = *MI.getParent(); 79 const MachineFunction *MF = MBB.getParent(); 80 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 81 82 { 83 auto Op = MI.getOpcode(); 84 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 85 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 86 } 87 88 // Meta-instructions emit no code. 89 if (MI.isMetaInstruction()) 90 return 0; 91 92 // FIXME: We currently only handle pseudoinstructions that don't get expanded 93 // before the assembly printer. 94 unsigned NumBytes = 0; 95 const MCInstrDesc &Desc = MI.getDesc(); 96 switch (Desc.getOpcode()) { 97 default: 98 // Anything not explicitly designated otherwise is a normal 4-byte insn. 99 NumBytes = 4; 100 break; 101 case TargetOpcode::STACKMAP: 102 // The upper bound for a stackmap intrinsic is the full length of its shadow 103 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 104 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 105 break; 106 case TargetOpcode::PATCHPOINT: 107 // The size of the patchpoint intrinsic is the number of bytes requested 108 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 109 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 110 break; 111 case TargetOpcode::STATEPOINT: 112 NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 113 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 114 // No patch bytes means a normal call inst is emitted 115 if (NumBytes == 0) 116 NumBytes = 4; 117 break; 118 case AArch64::TLSDESC_CALLSEQ: 119 // This gets lowered to an instruction sequence which takes 16 bytes 120 NumBytes = 16; 121 break; 122 case AArch64::SpeculationBarrierISBDSBEndBB: 123 // This gets lowered to 2 4-byte instructions. 124 NumBytes = 8; 125 break; 126 case AArch64::SpeculationBarrierSBEndBB: 127 // This gets lowered to 1 4-byte instructions. 128 NumBytes = 4; 129 break; 130 case AArch64::JumpTableDest32: 131 case AArch64::JumpTableDest16: 132 case AArch64::JumpTableDest8: 133 NumBytes = 12; 134 break; 135 case AArch64::SPACE: 136 NumBytes = MI.getOperand(1).getImm(); 137 break; 138 case AArch64::StoreSwiftAsyncContext: 139 NumBytes = 20; 140 break; 141 case TargetOpcode::BUNDLE: 142 NumBytes = getInstBundleLength(MI); 143 break; 144 } 145 146 return NumBytes; 147 } 148 149 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 150 unsigned Size = 0; 151 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 152 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 153 while (++I != E && I->isInsideBundle()) { 154 assert(!I->isBundle() && "No nested bundle!"); 155 Size += getInstSizeInBytes(*I); 156 } 157 return Size; 158 } 159 160 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 161 SmallVectorImpl<MachineOperand> &Cond) { 162 // Block ends with fall-through condbranch. 163 switch (LastInst->getOpcode()) { 164 default: 165 llvm_unreachable("Unknown branch instruction?"); 166 case AArch64::Bcc: 167 Target = LastInst->getOperand(1).getMBB(); 168 Cond.push_back(LastInst->getOperand(0)); 169 break; 170 case AArch64::CBZW: 171 case AArch64::CBZX: 172 case AArch64::CBNZW: 173 case AArch64::CBNZX: 174 Target = LastInst->getOperand(1).getMBB(); 175 Cond.push_back(MachineOperand::CreateImm(-1)); 176 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 177 Cond.push_back(LastInst->getOperand(0)); 178 break; 179 case AArch64::TBZW: 180 case AArch64::TBZX: 181 case AArch64::TBNZW: 182 case AArch64::TBNZX: 183 Target = LastInst->getOperand(2).getMBB(); 184 Cond.push_back(MachineOperand::CreateImm(-1)); 185 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 186 Cond.push_back(LastInst->getOperand(0)); 187 Cond.push_back(LastInst->getOperand(1)); 188 } 189 } 190 191 static unsigned getBranchDisplacementBits(unsigned Opc) { 192 switch (Opc) { 193 default: 194 llvm_unreachable("unexpected opcode!"); 195 case AArch64::B: 196 return 64; 197 case AArch64::TBNZW: 198 case AArch64::TBZW: 199 case AArch64::TBNZX: 200 case AArch64::TBZX: 201 return TBZDisplacementBits; 202 case AArch64::CBNZW: 203 case AArch64::CBZW: 204 case AArch64::CBNZX: 205 case AArch64::CBZX: 206 return CBZDisplacementBits; 207 case AArch64::Bcc: 208 return BCCDisplacementBits; 209 } 210 } 211 212 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 213 int64_t BrOffset) const { 214 unsigned Bits = getBranchDisplacementBits(BranchOp); 215 assert(Bits >= 3 && "max branch displacement must be enough to jump" 216 "over conditional branch expansion"); 217 return isIntN(Bits, BrOffset / 4); 218 } 219 220 MachineBasicBlock * 221 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 222 switch (MI.getOpcode()) { 223 default: 224 llvm_unreachable("unexpected opcode!"); 225 case AArch64::B: 226 return MI.getOperand(0).getMBB(); 227 case AArch64::TBZW: 228 case AArch64::TBNZW: 229 case AArch64::TBZX: 230 case AArch64::TBNZX: 231 return MI.getOperand(2).getMBB(); 232 case AArch64::CBZW: 233 case AArch64::CBNZW: 234 case AArch64::CBZX: 235 case AArch64::CBNZX: 236 case AArch64::Bcc: 237 return MI.getOperand(1).getMBB(); 238 } 239 } 240 241 // Branch analysis. 242 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 243 MachineBasicBlock *&TBB, 244 MachineBasicBlock *&FBB, 245 SmallVectorImpl<MachineOperand> &Cond, 246 bool AllowModify) const { 247 // If the block has no terminators, it just falls into the block after it. 248 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 249 if (I == MBB.end()) 250 return false; 251 252 // Skip over SpeculationBarrierEndBB terminators 253 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 254 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 255 --I; 256 } 257 258 if (!isUnpredicatedTerminator(*I)) 259 return false; 260 261 // Get the last instruction in the block. 262 MachineInstr *LastInst = &*I; 263 264 // If there is only one terminator instruction, process it. 265 unsigned LastOpc = LastInst->getOpcode(); 266 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 267 if (isUncondBranchOpcode(LastOpc)) { 268 TBB = LastInst->getOperand(0).getMBB(); 269 return false; 270 } 271 if (isCondBranchOpcode(LastOpc)) { 272 // Block ends with fall-through condbranch. 273 parseCondBranch(LastInst, TBB, Cond); 274 return false; 275 } 276 return true; // Can't handle indirect branch. 277 } 278 279 // Get the instruction before it if it is a terminator. 280 MachineInstr *SecondLastInst = &*I; 281 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 282 283 // If AllowModify is true and the block ends with two or more unconditional 284 // branches, delete all but the first unconditional branch. 285 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 286 while (isUncondBranchOpcode(SecondLastOpc)) { 287 LastInst->eraseFromParent(); 288 LastInst = SecondLastInst; 289 LastOpc = LastInst->getOpcode(); 290 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 291 // Return now the only terminator is an unconditional branch. 292 TBB = LastInst->getOperand(0).getMBB(); 293 return false; 294 } else { 295 SecondLastInst = &*I; 296 SecondLastOpc = SecondLastInst->getOpcode(); 297 } 298 } 299 } 300 301 // If we're allowed to modify and the block ends in a unconditional branch 302 // which could simply fallthrough, remove the branch. (Note: This case only 303 // matters when we can't understand the whole sequence, otherwise it's also 304 // handled by BranchFolding.cpp.) 305 if (AllowModify && isUncondBranchOpcode(LastOpc) && 306 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { 307 LastInst->eraseFromParent(); 308 LastInst = SecondLastInst; 309 LastOpc = LastInst->getOpcode(); 310 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 311 assert(!isUncondBranchOpcode(LastOpc) && 312 "unreachable unconditional branches removed above"); 313 314 if (isCondBranchOpcode(LastOpc)) { 315 // Block ends with fall-through condbranch. 316 parseCondBranch(LastInst, TBB, Cond); 317 return false; 318 } 319 return true; // Can't handle indirect branch. 320 } else { 321 SecondLastInst = &*I; 322 SecondLastOpc = SecondLastInst->getOpcode(); 323 } 324 } 325 326 // If there are three terminators, we don't know what sort of block this is. 327 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 328 return true; 329 330 // If the block ends with a B and a Bcc, handle it. 331 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 332 parseCondBranch(SecondLastInst, TBB, Cond); 333 FBB = LastInst->getOperand(0).getMBB(); 334 return false; 335 } 336 337 // If the block ends with two unconditional branches, handle it. The second 338 // one is not executed, so remove it. 339 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 340 TBB = SecondLastInst->getOperand(0).getMBB(); 341 I = LastInst; 342 if (AllowModify) 343 I->eraseFromParent(); 344 return false; 345 } 346 347 // ...likewise if it ends with an indirect branch followed by an unconditional 348 // branch. 349 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 350 I = LastInst; 351 if (AllowModify) 352 I->eraseFromParent(); 353 return true; 354 } 355 356 // Otherwise, can't handle this. 357 return true; 358 } 359 360 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 361 MachineBranchPredicate &MBP, 362 bool AllowModify) const { 363 // For the moment, handle only a block which ends with a cb(n)zx followed by 364 // a fallthrough. Why this? Because it is a common form. 365 // TODO: Should we handle b.cc? 366 367 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 368 if (I == MBB.end()) 369 return true; 370 371 // Skip over SpeculationBarrierEndBB terminators 372 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 373 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 374 --I; 375 } 376 377 if (!isUnpredicatedTerminator(*I)) 378 return true; 379 380 // Get the last instruction in the block. 381 MachineInstr *LastInst = &*I; 382 unsigned LastOpc = LastInst->getOpcode(); 383 if (!isCondBranchOpcode(LastOpc)) 384 return true; 385 386 switch (LastOpc) { 387 default: 388 return true; 389 case AArch64::CBZW: 390 case AArch64::CBZX: 391 case AArch64::CBNZW: 392 case AArch64::CBNZX: 393 break; 394 }; 395 396 MBP.TrueDest = LastInst->getOperand(1).getMBB(); 397 assert(MBP.TrueDest && "expected!"); 398 MBP.FalseDest = MBB.getNextNode(); 399 400 MBP.ConditionDef = nullptr; 401 MBP.SingleUseCondition = false; 402 403 MBP.LHS = LastInst->getOperand(0); 404 MBP.RHS = MachineOperand::CreateImm(0); 405 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE 406 : MachineBranchPredicate::PRED_EQ; 407 return false; 408 } 409 410 bool AArch64InstrInfo::reverseBranchCondition( 411 SmallVectorImpl<MachineOperand> &Cond) const { 412 if (Cond[0].getImm() != -1) { 413 // Regular Bcc 414 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 415 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 416 } else { 417 // Folded compare-and-branch 418 switch (Cond[1].getImm()) { 419 default: 420 llvm_unreachable("Unknown conditional branch!"); 421 case AArch64::CBZW: 422 Cond[1].setImm(AArch64::CBNZW); 423 break; 424 case AArch64::CBNZW: 425 Cond[1].setImm(AArch64::CBZW); 426 break; 427 case AArch64::CBZX: 428 Cond[1].setImm(AArch64::CBNZX); 429 break; 430 case AArch64::CBNZX: 431 Cond[1].setImm(AArch64::CBZX); 432 break; 433 case AArch64::TBZW: 434 Cond[1].setImm(AArch64::TBNZW); 435 break; 436 case AArch64::TBNZW: 437 Cond[1].setImm(AArch64::TBZW); 438 break; 439 case AArch64::TBZX: 440 Cond[1].setImm(AArch64::TBNZX); 441 break; 442 case AArch64::TBNZX: 443 Cond[1].setImm(AArch64::TBZX); 444 break; 445 } 446 } 447 448 return false; 449 } 450 451 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 452 int *BytesRemoved) const { 453 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 454 if (I == MBB.end()) 455 return 0; 456 457 if (!isUncondBranchOpcode(I->getOpcode()) && 458 !isCondBranchOpcode(I->getOpcode())) 459 return 0; 460 461 // Remove the branch. 462 I->eraseFromParent(); 463 464 I = MBB.end(); 465 466 if (I == MBB.begin()) { 467 if (BytesRemoved) 468 *BytesRemoved = 4; 469 return 1; 470 } 471 --I; 472 if (!isCondBranchOpcode(I->getOpcode())) { 473 if (BytesRemoved) 474 *BytesRemoved = 4; 475 return 1; 476 } 477 478 // Remove the branch. 479 I->eraseFromParent(); 480 if (BytesRemoved) 481 *BytesRemoved = 8; 482 483 return 2; 484 } 485 486 void AArch64InstrInfo::instantiateCondBranch( 487 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 488 ArrayRef<MachineOperand> Cond) const { 489 if (Cond[0].getImm() != -1) { 490 // Regular Bcc 491 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 492 } else { 493 // Folded compare-and-branch 494 // Note that we use addOperand instead of addReg to keep the flags. 495 const MachineInstrBuilder MIB = 496 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 497 if (Cond.size() > 3) 498 MIB.addImm(Cond[3].getImm()); 499 MIB.addMBB(TBB); 500 } 501 } 502 503 unsigned AArch64InstrInfo::insertBranch( 504 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 505 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 506 // Shouldn't be a fall through. 507 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 508 509 if (!FBB) { 510 if (Cond.empty()) // Unconditional branch? 511 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 512 else 513 instantiateCondBranch(MBB, DL, TBB, Cond); 514 515 if (BytesAdded) 516 *BytesAdded = 4; 517 518 return 1; 519 } 520 521 // Two-way conditional branch. 522 instantiateCondBranch(MBB, DL, TBB, Cond); 523 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 524 525 if (BytesAdded) 526 *BytesAdded = 8; 527 528 return 2; 529 } 530 531 // Find the original register that VReg is copied from. 532 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 533 while (Register::isVirtualRegister(VReg)) { 534 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 535 if (!DefMI->isFullCopy()) 536 return VReg; 537 VReg = DefMI->getOperand(1).getReg(); 538 } 539 return VReg; 540 } 541 542 // Determine if VReg is defined by an instruction that can be folded into a 543 // csel instruction. If so, return the folded opcode, and the replacement 544 // register. 545 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 546 unsigned *NewVReg = nullptr) { 547 VReg = removeCopies(MRI, VReg); 548 if (!Register::isVirtualRegister(VReg)) 549 return 0; 550 551 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 552 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 553 unsigned Opc = 0; 554 unsigned SrcOpNum = 0; 555 switch (DefMI->getOpcode()) { 556 case AArch64::ADDSXri: 557 case AArch64::ADDSWri: 558 // if NZCV is used, do not fold. 559 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 560 return 0; 561 // fall-through to ADDXri and ADDWri. 562 LLVM_FALLTHROUGH; 563 case AArch64::ADDXri: 564 case AArch64::ADDWri: 565 // add x, 1 -> csinc. 566 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 567 DefMI->getOperand(3).getImm() != 0) 568 return 0; 569 SrcOpNum = 1; 570 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 571 break; 572 573 case AArch64::ORNXrr: 574 case AArch64::ORNWrr: { 575 // not x -> csinv, represented as orn dst, xzr, src. 576 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 577 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 578 return 0; 579 SrcOpNum = 2; 580 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 581 break; 582 } 583 584 case AArch64::SUBSXrr: 585 case AArch64::SUBSWrr: 586 // if NZCV is used, do not fold. 587 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 588 return 0; 589 // fall-through to SUBXrr and SUBWrr. 590 LLVM_FALLTHROUGH; 591 case AArch64::SUBXrr: 592 case AArch64::SUBWrr: { 593 // neg x -> csneg, represented as sub dst, xzr, src. 594 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 595 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 596 return 0; 597 SrcOpNum = 2; 598 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 599 break; 600 } 601 default: 602 return 0; 603 } 604 assert(Opc && SrcOpNum && "Missing parameters"); 605 606 if (NewVReg) 607 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 608 return Opc; 609 } 610 611 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 612 ArrayRef<MachineOperand> Cond, 613 Register DstReg, Register TrueReg, 614 Register FalseReg, int &CondCycles, 615 int &TrueCycles, 616 int &FalseCycles) const { 617 // Check register classes. 618 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 619 const TargetRegisterClass *RC = 620 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 621 if (!RC) 622 return false; 623 624 // Also need to check the dest regclass, in case we're trying to optimize 625 // something like: 626 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 627 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) 628 return false; 629 630 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 631 unsigned ExtraCondLat = Cond.size() != 1; 632 633 // GPRs are handled by csel. 634 // FIXME: Fold in x+1, -x, and ~x when applicable. 635 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 636 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 637 // Single-cycle csel, csinc, csinv, and csneg. 638 CondCycles = 1 + ExtraCondLat; 639 TrueCycles = FalseCycles = 1; 640 if (canFoldIntoCSel(MRI, TrueReg)) 641 TrueCycles = 0; 642 else if (canFoldIntoCSel(MRI, FalseReg)) 643 FalseCycles = 0; 644 return true; 645 } 646 647 // Scalar floating point is handled by fcsel. 648 // FIXME: Form fabs, fmin, and fmax when applicable. 649 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 650 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 651 CondCycles = 5 + ExtraCondLat; 652 TrueCycles = FalseCycles = 2; 653 return true; 654 } 655 656 // Can't do vectors. 657 return false; 658 } 659 660 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 661 MachineBasicBlock::iterator I, 662 const DebugLoc &DL, Register DstReg, 663 ArrayRef<MachineOperand> Cond, 664 Register TrueReg, Register FalseReg) const { 665 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 666 667 // Parse the condition code, see parseCondBranch() above. 668 AArch64CC::CondCode CC; 669 switch (Cond.size()) { 670 default: 671 llvm_unreachable("Unknown condition opcode in Cond"); 672 case 1: // b.cc 673 CC = AArch64CC::CondCode(Cond[0].getImm()); 674 break; 675 case 3: { // cbz/cbnz 676 // We must insert a compare against 0. 677 bool Is64Bit; 678 switch (Cond[1].getImm()) { 679 default: 680 llvm_unreachable("Unknown branch opcode in Cond"); 681 case AArch64::CBZW: 682 Is64Bit = false; 683 CC = AArch64CC::EQ; 684 break; 685 case AArch64::CBZX: 686 Is64Bit = true; 687 CC = AArch64CC::EQ; 688 break; 689 case AArch64::CBNZW: 690 Is64Bit = false; 691 CC = AArch64CC::NE; 692 break; 693 case AArch64::CBNZX: 694 Is64Bit = true; 695 CC = AArch64CC::NE; 696 break; 697 } 698 Register SrcReg = Cond[2].getReg(); 699 if (Is64Bit) { 700 // cmp reg, #0 is actually subs xzr, reg, #0. 701 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 702 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 703 .addReg(SrcReg) 704 .addImm(0) 705 .addImm(0); 706 } else { 707 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 708 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 709 .addReg(SrcReg) 710 .addImm(0) 711 .addImm(0); 712 } 713 break; 714 } 715 case 4: { // tbz/tbnz 716 // We must insert a tst instruction. 717 switch (Cond[1].getImm()) { 718 default: 719 llvm_unreachable("Unknown branch opcode in Cond"); 720 case AArch64::TBZW: 721 case AArch64::TBZX: 722 CC = AArch64CC::EQ; 723 break; 724 case AArch64::TBNZW: 725 case AArch64::TBNZX: 726 CC = AArch64CC::NE; 727 break; 728 } 729 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 730 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 731 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 732 .addReg(Cond[2].getReg()) 733 .addImm( 734 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 735 else 736 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 737 .addReg(Cond[2].getReg()) 738 .addImm( 739 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 740 break; 741 } 742 } 743 744 unsigned Opc = 0; 745 const TargetRegisterClass *RC = nullptr; 746 bool TryFold = false; 747 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 748 RC = &AArch64::GPR64RegClass; 749 Opc = AArch64::CSELXr; 750 TryFold = true; 751 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 752 RC = &AArch64::GPR32RegClass; 753 Opc = AArch64::CSELWr; 754 TryFold = true; 755 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 756 RC = &AArch64::FPR64RegClass; 757 Opc = AArch64::FCSELDrrr; 758 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 759 RC = &AArch64::FPR32RegClass; 760 Opc = AArch64::FCSELSrrr; 761 } 762 assert(RC && "Unsupported regclass"); 763 764 // Try folding simple instructions into the csel. 765 if (TryFold) { 766 unsigned NewVReg = 0; 767 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 768 if (FoldedOpc) { 769 // The folded opcodes csinc, csinc and csneg apply the operation to 770 // FalseReg, so we need to invert the condition. 771 CC = AArch64CC::getInvertedCondCode(CC); 772 TrueReg = FalseReg; 773 } else 774 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 775 776 // Fold the operation. Leave any dead instructions for DCE to clean up. 777 if (FoldedOpc) { 778 FalseReg = NewVReg; 779 Opc = FoldedOpc; 780 // The extends the live range of NewVReg. 781 MRI.clearKillFlags(NewVReg); 782 } 783 } 784 785 // Pull all virtual register into the appropriate class. 786 MRI.constrainRegClass(TrueReg, RC); 787 MRI.constrainRegClass(FalseReg, RC); 788 789 // Insert the csel. 790 BuildMI(MBB, I, DL, get(Opc), DstReg) 791 .addReg(TrueReg) 792 .addReg(FalseReg) 793 .addImm(CC); 794 } 795 796 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 797 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 798 uint64_t Imm = MI.getOperand(1).getImm(); 799 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 800 uint64_t Encoding; 801 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 802 } 803 804 // FIXME: this implementation should be micro-architecture dependent, so a 805 // micro-architecture target hook should be introduced here in future. 806 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 807 if (!Subtarget.hasCustomCheapAsMoveHandling()) 808 return MI.isAsCheapAsAMove(); 809 810 const unsigned Opcode = MI.getOpcode(); 811 812 // Firstly, check cases gated by features. 813 814 if (Subtarget.hasZeroCycleZeroingFP()) { 815 if (Opcode == AArch64::FMOVH0 || 816 Opcode == AArch64::FMOVS0 || 817 Opcode == AArch64::FMOVD0) 818 return true; 819 } 820 821 if (Subtarget.hasZeroCycleZeroingGP()) { 822 if (Opcode == TargetOpcode::COPY && 823 (MI.getOperand(1).getReg() == AArch64::WZR || 824 MI.getOperand(1).getReg() == AArch64::XZR)) 825 return true; 826 } 827 828 // Secondly, check cases specific to sub-targets. 829 830 if (Subtarget.hasExynosCheapAsMoveHandling()) { 831 if (isExynosCheapAsMove(MI)) 832 return true; 833 834 return MI.isAsCheapAsAMove(); 835 } 836 837 // Finally, check generic cases. 838 839 switch (Opcode) { 840 default: 841 return false; 842 843 // add/sub on register without shift 844 case AArch64::ADDWri: 845 case AArch64::ADDXri: 846 case AArch64::SUBWri: 847 case AArch64::SUBXri: 848 return (MI.getOperand(3).getImm() == 0); 849 850 // logical ops on immediate 851 case AArch64::ANDWri: 852 case AArch64::ANDXri: 853 case AArch64::EORWri: 854 case AArch64::EORXri: 855 case AArch64::ORRWri: 856 case AArch64::ORRXri: 857 return true; 858 859 // logical ops on register without shift 860 case AArch64::ANDWrr: 861 case AArch64::ANDXrr: 862 case AArch64::BICWrr: 863 case AArch64::BICXrr: 864 case AArch64::EONWrr: 865 case AArch64::EONXrr: 866 case AArch64::EORWrr: 867 case AArch64::EORXrr: 868 case AArch64::ORNWrr: 869 case AArch64::ORNXrr: 870 case AArch64::ORRWrr: 871 case AArch64::ORRXrr: 872 return true; 873 874 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 875 // ORRXri, it is as cheap as MOV 876 case AArch64::MOVi32imm: 877 return canBeExpandedToORR(MI, 32); 878 case AArch64::MOVi64imm: 879 return canBeExpandedToORR(MI, 64); 880 } 881 882 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 883 } 884 885 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 886 switch (MI.getOpcode()) { 887 default: 888 return false; 889 890 case AArch64::ADDWrs: 891 case AArch64::ADDXrs: 892 case AArch64::ADDSWrs: 893 case AArch64::ADDSXrs: { 894 unsigned Imm = MI.getOperand(3).getImm(); 895 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 896 if (ShiftVal == 0) 897 return true; 898 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 899 } 900 901 case AArch64::ADDWrx: 902 case AArch64::ADDXrx: 903 case AArch64::ADDXrx64: 904 case AArch64::ADDSWrx: 905 case AArch64::ADDSXrx: 906 case AArch64::ADDSXrx64: { 907 unsigned Imm = MI.getOperand(3).getImm(); 908 switch (AArch64_AM::getArithExtendType(Imm)) { 909 default: 910 return false; 911 case AArch64_AM::UXTB: 912 case AArch64_AM::UXTH: 913 case AArch64_AM::UXTW: 914 case AArch64_AM::UXTX: 915 return AArch64_AM::getArithShiftValue(Imm) <= 4; 916 } 917 } 918 919 case AArch64::SUBWrs: 920 case AArch64::SUBSWrs: { 921 unsigned Imm = MI.getOperand(3).getImm(); 922 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 923 return ShiftVal == 0 || 924 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 925 } 926 927 case AArch64::SUBXrs: 928 case AArch64::SUBSXrs: { 929 unsigned Imm = MI.getOperand(3).getImm(); 930 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 931 return ShiftVal == 0 || 932 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 933 } 934 935 case AArch64::SUBWrx: 936 case AArch64::SUBXrx: 937 case AArch64::SUBXrx64: 938 case AArch64::SUBSWrx: 939 case AArch64::SUBSXrx: 940 case AArch64::SUBSXrx64: { 941 unsigned Imm = MI.getOperand(3).getImm(); 942 switch (AArch64_AM::getArithExtendType(Imm)) { 943 default: 944 return false; 945 case AArch64_AM::UXTB: 946 case AArch64_AM::UXTH: 947 case AArch64_AM::UXTW: 948 case AArch64_AM::UXTX: 949 return AArch64_AM::getArithShiftValue(Imm) == 0; 950 } 951 } 952 953 case AArch64::LDRBBroW: 954 case AArch64::LDRBBroX: 955 case AArch64::LDRBroW: 956 case AArch64::LDRBroX: 957 case AArch64::LDRDroW: 958 case AArch64::LDRDroX: 959 case AArch64::LDRHHroW: 960 case AArch64::LDRHHroX: 961 case AArch64::LDRHroW: 962 case AArch64::LDRHroX: 963 case AArch64::LDRQroW: 964 case AArch64::LDRQroX: 965 case AArch64::LDRSBWroW: 966 case AArch64::LDRSBWroX: 967 case AArch64::LDRSBXroW: 968 case AArch64::LDRSBXroX: 969 case AArch64::LDRSHWroW: 970 case AArch64::LDRSHWroX: 971 case AArch64::LDRSHXroW: 972 case AArch64::LDRSHXroX: 973 case AArch64::LDRSWroW: 974 case AArch64::LDRSWroX: 975 case AArch64::LDRSroW: 976 case AArch64::LDRSroX: 977 case AArch64::LDRWroW: 978 case AArch64::LDRWroX: 979 case AArch64::LDRXroW: 980 case AArch64::LDRXroX: 981 case AArch64::PRFMroW: 982 case AArch64::PRFMroX: 983 case AArch64::STRBBroW: 984 case AArch64::STRBBroX: 985 case AArch64::STRBroW: 986 case AArch64::STRBroX: 987 case AArch64::STRDroW: 988 case AArch64::STRDroX: 989 case AArch64::STRHHroW: 990 case AArch64::STRHHroX: 991 case AArch64::STRHroW: 992 case AArch64::STRHroX: 993 case AArch64::STRQroW: 994 case AArch64::STRQroX: 995 case AArch64::STRSroW: 996 case AArch64::STRSroX: 997 case AArch64::STRWroW: 998 case AArch64::STRWroX: 999 case AArch64::STRXroW: 1000 case AArch64::STRXroX: { 1001 unsigned IsSigned = MI.getOperand(3).getImm(); 1002 return !IsSigned; 1003 } 1004 } 1005 } 1006 1007 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 1008 unsigned Opc = MI.getOpcode(); 1009 switch (Opc) { 1010 default: 1011 return false; 1012 case AArch64::SEH_StackAlloc: 1013 case AArch64::SEH_SaveFPLR: 1014 case AArch64::SEH_SaveFPLR_X: 1015 case AArch64::SEH_SaveReg: 1016 case AArch64::SEH_SaveReg_X: 1017 case AArch64::SEH_SaveRegP: 1018 case AArch64::SEH_SaveRegP_X: 1019 case AArch64::SEH_SaveFReg: 1020 case AArch64::SEH_SaveFReg_X: 1021 case AArch64::SEH_SaveFRegP: 1022 case AArch64::SEH_SaveFRegP_X: 1023 case AArch64::SEH_SetFP: 1024 case AArch64::SEH_AddFP: 1025 case AArch64::SEH_Nop: 1026 case AArch64::SEH_PrologEnd: 1027 case AArch64::SEH_EpilogStart: 1028 case AArch64::SEH_EpilogEnd: 1029 return true; 1030 } 1031 } 1032 1033 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 1034 Register &SrcReg, Register &DstReg, 1035 unsigned &SubIdx) const { 1036 switch (MI.getOpcode()) { 1037 default: 1038 return false; 1039 case AArch64::SBFMXri: // aka sxtw 1040 case AArch64::UBFMXri: // aka uxtw 1041 // Check for the 32 -> 64 bit extension case, these instructions can do 1042 // much more. 1043 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 1044 return false; 1045 // This is a signed or unsigned 32 -> 64 bit extension. 1046 SrcReg = MI.getOperand(1).getReg(); 1047 DstReg = MI.getOperand(0).getReg(); 1048 SubIdx = AArch64::sub_32; 1049 return true; 1050 } 1051 } 1052 1053 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 1054 const MachineInstr &MIa, const MachineInstr &MIb) const { 1055 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1056 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 1057 int64_t OffsetA = 0, OffsetB = 0; 1058 unsigned WidthA = 0, WidthB = 0; 1059 bool OffsetAIsScalable = false, OffsetBIsScalable = false; 1060 1061 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 1062 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 1063 1064 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 1065 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 1066 return false; 1067 1068 // Retrieve the base, offset from the base and width. Width 1069 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 1070 // base are identical, and the offset of a lower memory access + 1071 // the width doesn't overlap the offset of a higher memory access, 1072 // then the memory accesses are different. 1073 // If OffsetAIsScalable and OffsetBIsScalable are both true, they 1074 // are assumed to have the same scale (vscale). 1075 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, 1076 WidthA, TRI) && 1077 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, 1078 WidthB, TRI)) { 1079 if (BaseOpA->isIdenticalTo(*BaseOpB) && 1080 OffsetAIsScalable == OffsetBIsScalable) { 1081 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 1082 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 1083 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 1084 if (LowOffset + LowWidth <= HighOffset) 1085 return true; 1086 } 1087 } 1088 return false; 1089 } 1090 1091 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1092 const MachineBasicBlock *MBB, 1093 const MachineFunction &MF) const { 1094 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 1095 return true; 1096 switch (MI.getOpcode()) { 1097 case AArch64::HINT: 1098 // CSDB hints are scheduling barriers. 1099 if (MI.getOperand(0).getImm() == 0x14) 1100 return true; 1101 break; 1102 case AArch64::DSB: 1103 case AArch64::ISB: 1104 // DSB and ISB also are scheduling barriers. 1105 return true; 1106 default:; 1107 } 1108 return isSEHInstruction(MI); 1109 } 1110 1111 /// analyzeCompare - For a comparison instruction, return the source registers 1112 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1113 /// Return true if the comparison instruction can be analyzed. 1114 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 1115 Register &SrcReg2, int64_t &CmpMask, 1116 int64_t &CmpValue) const { 1117 // The first operand can be a frame index where we'd normally expect a 1118 // register. 1119 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1120 if (!MI.getOperand(1).isReg()) 1121 return false; 1122 1123 switch (MI.getOpcode()) { 1124 default: 1125 break; 1126 case AArch64::PTEST_PP: 1127 SrcReg = MI.getOperand(0).getReg(); 1128 SrcReg2 = MI.getOperand(1).getReg(); 1129 // Not sure about the mask and value for now... 1130 CmpMask = ~0; 1131 CmpValue = 0; 1132 return true; 1133 case AArch64::SUBSWrr: 1134 case AArch64::SUBSWrs: 1135 case AArch64::SUBSWrx: 1136 case AArch64::SUBSXrr: 1137 case AArch64::SUBSXrs: 1138 case AArch64::SUBSXrx: 1139 case AArch64::ADDSWrr: 1140 case AArch64::ADDSWrs: 1141 case AArch64::ADDSWrx: 1142 case AArch64::ADDSXrr: 1143 case AArch64::ADDSXrs: 1144 case AArch64::ADDSXrx: 1145 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1146 SrcReg = MI.getOperand(1).getReg(); 1147 SrcReg2 = MI.getOperand(2).getReg(); 1148 CmpMask = ~0; 1149 CmpValue = 0; 1150 return true; 1151 case AArch64::SUBSWri: 1152 case AArch64::ADDSWri: 1153 case AArch64::SUBSXri: 1154 case AArch64::ADDSXri: 1155 SrcReg = MI.getOperand(1).getReg(); 1156 SrcReg2 = 0; 1157 CmpMask = ~0; 1158 CmpValue = MI.getOperand(2).getImm(); 1159 return true; 1160 case AArch64::ANDSWri: 1161 case AArch64::ANDSXri: 1162 // ANDS does not use the same encoding scheme as the others xxxS 1163 // instructions. 1164 SrcReg = MI.getOperand(1).getReg(); 1165 SrcReg2 = 0; 1166 CmpMask = ~0; 1167 CmpValue = AArch64_AM::decodeLogicalImmediate( 1168 MI.getOperand(2).getImm(), 1169 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64); 1170 return true; 1171 } 1172 1173 return false; 1174 } 1175 1176 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1177 MachineBasicBlock *MBB = Instr.getParent(); 1178 assert(MBB && "Can't get MachineBasicBlock here"); 1179 MachineFunction *MF = MBB->getParent(); 1180 assert(MF && "Can't get MachineFunction here"); 1181 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1182 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1183 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1184 1185 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1186 ++OpIdx) { 1187 MachineOperand &MO = Instr.getOperand(OpIdx); 1188 const TargetRegisterClass *OpRegCstraints = 1189 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1190 1191 // If there's no constraint, there's nothing to do. 1192 if (!OpRegCstraints) 1193 continue; 1194 // If the operand is a frame index, there's nothing to do here. 1195 // A frame index operand will resolve correctly during PEI. 1196 if (MO.isFI()) 1197 continue; 1198 1199 assert(MO.isReg() && 1200 "Operand has register constraints without being a register!"); 1201 1202 Register Reg = MO.getReg(); 1203 if (Register::isPhysicalRegister(Reg)) { 1204 if (!OpRegCstraints->contains(Reg)) 1205 return false; 1206 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1207 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1208 return false; 1209 } 1210 1211 return true; 1212 } 1213 1214 /// Return the opcode that does not set flags when possible - otherwise 1215 /// return the original opcode. The caller is responsible to do the actual 1216 /// substitution and legality checking. 1217 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1218 // Don't convert all compare instructions, because for some the zero register 1219 // encoding becomes the sp register. 1220 bool MIDefinesZeroReg = false; 1221 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1222 MIDefinesZeroReg = true; 1223 1224 switch (MI.getOpcode()) { 1225 default: 1226 return MI.getOpcode(); 1227 case AArch64::ADDSWrr: 1228 return AArch64::ADDWrr; 1229 case AArch64::ADDSWri: 1230 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1231 case AArch64::ADDSWrs: 1232 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1233 case AArch64::ADDSWrx: 1234 return AArch64::ADDWrx; 1235 case AArch64::ADDSXrr: 1236 return AArch64::ADDXrr; 1237 case AArch64::ADDSXri: 1238 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1239 case AArch64::ADDSXrs: 1240 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1241 case AArch64::ADDSXrx: 1242 return AArch64::ADDXrx; 1243 case AArch64::SUBSWrr: 1244 return AArch64::SUBWrr; 1245 case AArch64::SUBSWri: 1246 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1247 case AArch64::SUBSWrs: 1248 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1249 case AArch64::SUBSWrx: 1250 return AArch64::SUBWrx; 1251 case AArch64::SUBSXrr: 1252 return AArch64::SUBXrr; 1253 case AArch64::SUBSXri: 1254 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1255 case AArch64::SUBSXrs: 1256 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1257 case AArch64::SUBSXrx: 1258 return AArch64::SUBXrx; 1259 } 1260 } 1261 1262 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1263 1264 /// True when condition flags are accessed (either by writing or reading) 1265 /// on the instruction trace starting at From and ending at To. 1266 /// 1267 /// Note: If From and To are from different blocks it's assumed CC are accessed 1268 /// on the path. 1269 static bool areCFlagsAccessedBetweenInstrs( 1270 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1271 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1272 // Early exit if To is at the beginning of the BB. 1273 if (To == To->getParent()->begin()) 1274 return true; 1275 1276 // Check whether the instructions are in the same basic block 1277 // If not, assume the condition flags might get modified somewhere. 1278 if (To->getParent() != From->getParent()) 1279 return true; 1280 1281 // From must be above To. 1282 assert(std::any_of( 1283 ++To.getReverse(), To->getParent()->rend(), 1284 [From](MachineInstr &MI) { return MI.getIterator() == From; })); 1285 1286 // We iterate backward starting at \p To until we hit \p From. 1287 for (const MachineInstr &Instr : 1288 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { 1289 if (((AccessToCheck & AK_Write) && 1290 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1291 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1292 return true; 1293 } 1294 return false; 1295 } 1296 1297 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating 1298 /// operation which could set the flags in an identical manner 1299 bool AArch64InstrInfo::optimizePTestInstr( 1300 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, 1301 const MachineRegisterInfo *MRI) const { 1302 auto *Mask = MRI->getUniqueVRegDef(MaskReg); 1303 auto *Pred = MRI->getUniqueVRegDef(PredReg); 1304 auto NewOp = Pred->getOpcode(); 1305 bool OpChanged = false; 1306 1307 unsigned MaskOpcode = Mask->getOpcode(); 1308 unsigned PredOpcode = Pred->getOpcode(); 1309 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); 1310 bool PredIsWhileLike = isWhileOpcode(PredOpcode); 1311 1312 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { 1313 // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't 1314 // deactivate any lanes OTHER_INST might set. 1315 uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); 1316 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); 1317 1318 // Must be an all active predicate of matching element size. 1319 if ((PredElementSize != MaskElementSize) || 1320 (Mask->getOperand(1).getImm() != 31)) 1321 return false; 1322 1323 // Fallthough to simply remove the PTEST. 1324 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { 1325 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an 1326 // instruction that sets the flags as PTEST would. 1327 1328 // Fallthough to simply remove the PTEST. 1329 } else if (PredIsPTestLike) { 1330 // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both 1331 // instructions use the same predicate. 1332 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1333 if (Mask != PTestLikeMask) 1334 return false; 1335 1336 // Fallthough to simply remove the PTEST. 1337 } else { 1338 switch (Pred->getOpcode()) { 1339 case AArch64::BRKB_PPzP: 1340 case AArch64::BRKPB_PPzPP: { 1341 // Op 0 is chain, 1 is the mask, 2 the previous predicate to 1342 // propagate, 3 the new predicate. 1343 1344 // Check to see if our mask is the same as the brkpb's. If 1345 // not the resulting flag bits may be different and we 1346 // can't remove the ptest. 1347 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1348 if (Mask != PredMask) 1349 return false; 1350 1351 // Switch to the new opcode 1352 NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP 1353 : AArch64::BRKPBS_PPzPP; 1354 OpChanged = true; 1355 break; 1356 } 1357 case AArch64::BRKN_PPzP: { 1358 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1359 if (Mask != PredMask) 1360 return false; 1361 1362 NewOp = AArch64::BRKNS_PPzP; 1363 OpChanged = true; 1364 break; 1365 } 1366 case AArch64::RDFFR_PPz: { 1367 // rdffr p1.b, PredMask=p0/z <--- Definition of Pred 1368 // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use 1369 // `rdffrs p1.b, p0/z` above. 1370 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 1371 if (Mask != PredMask) 1372 return false; 1373 1374 NewOp = AArch64::RDFFRS_PPz; 1375 OpChanged = true; 1376 break; 1377 } 1378 default: 1379 // Bail out if we don't recognize the input 1380 return false; 1381 } 1382 } 1383 1384 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1385 1386 // If another instruction between Pred and PTest accesses flags, don't remove 1387 // the ptest or update the earlier instruction to modify them. 1388 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI)) 1389 return false; 1390 1391 // If we pass all the checks, it's safe to remove the PTEST and use the flags 1392 // as they are prior to PTEST. Sometimes this requires the tested PTEST 1393 // operand to be replaced with an equivalent instruction that also sets the 1394 // flags. 1395 Pred->setDesc(get(NewOp)); 1396 PTest->eraseFromParent(); 1397 if (OpChanged) { 1398 bool succeeded = UpdateOperandRegClass(*Pred); 1399 (void)succeeded; 1400 assert(succeeded && "Operands have incompatible register classes!"); 1401 Pred->addRegisterDefined(AArch64::NZCV, TRI); 1402 } 1403 1404 // Ensure that the flags def is live. 1405 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { 1406 unsigned i = 0, e = Pred->getNumOperands(); 1407 for (; i != e; ++i) { 1408 MachineOperand &MO = Pred->getOperand(i); 1409 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { 1410 MO.setIsDead(false); 1411 break; 1412 } 1413 } 1414 } 1415 return true; 1416 } 1417 1418 /// Try to optimize a compare instruction. A compare instruction is an 1419 /// instruction which produces AArch64::NZCV. It can be truly compare 1420 /// instruction 1421 /// when there are no uses of its destination register. 1422 /// 1423 /// The following steps are tried in order: 1424 /// 1. Convert CmpInstr into an unconditional version. 1425 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1426 /// condition code or an instruction which can be converted into such an 1427 /// instruction. 1428 /// Only comparison with zero is supported. 1429 bool AArch64InstrInfo::optimizeCompareInstr( 1430 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, 1431 int64_t CmpValue, const MachineRegisterInfo *MRI) const { 1432 assert(CmpInstr.getParent()); 1433 assert(MRI); 1434 1435 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1436 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1437 if (DeadNZCVIdx != -1) { 1438 if (CmpInstr.definesRegister(AArch64::WZR) || 1439 CmpInstr.definesRegister(AArch64::XZR)) { 1440 CmpInstr.eraseFromParent(); 1441 return true; 1442 } 1443 unsigned Opc = CmpInstr.getOpcode(); 1444 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1445 if (NewOpc == Opc) 1446 return false; 1447 const MCInstrDesc &MCID = get(NewOpc); 1448 CmpInstr.setDesc(MCID); 1449 CmpInstr.RemoveOperand(DeadNZCVIdx); 1450 bool succeeded = UpdateOperandRegClass(CmpInstr); 1451 (void)succeeded; 1452 assert(succeeded && "Some operands reg class are incompatible!"); 1453 return true; 1454 } 1455 1456 if (CmpInstr.getOpcode() == AArch64::PTEST_PP) 1457 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); 1458 1459 if (SrcReg2 != 0) 1460 return false; 1461 1462 // CmpInstr is a Compare instruction if destination register is not used. 1463 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1464 return false; 1465 1466 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) 1467 return true; 1468 return (CmpValue == 0 || CmpValue == 1) && 1469 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); 1470 } 1471 1472 /// Get opcode of S version of Instr. 1473 /// If Instr is S version its opcode is returned. 1474 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1475 /// or we are not interested in it. 1476 static unsigned sForm(MachineInstr &Instr) { 1477 switch (Instr.getOpcode()) { 1478 default: 1479 return AArch64::INSTRUCTION_LIST_END; 1480 1481 case AArch64::ADDSWrr: 1482 case AArch64::ADDSWri: 1483 case AArch64::ADDSXrr: 1484 case AArch64::ADDSXri: 1485 case AArch64::SUBSWrr: 1486 case AArch64::SUBSWri: 1487 case AArch64::SUBSXrr: 1488 case AArch64::SUBSXri: 1489 return Instr.getOpcode(); 1490 1491 case AArch64::ADDWrr: 1492 return AArch64::ADDSWrr; 1493 case AArch64::ADDWri: 1494 return AArch64::ADDSWri; 1495 case AArch64::ADDXrr: 1496 return AArch64::ADDSXrr; 1497 case AArch64::ADDXri: 1498 return AArch64::ADDSXri; 1499 case AArch64::ADCWr: 1500 return AArch64::ADCSWr; 1501 case AArch64::ADCXr: 1502 return AArch64::ADCSXr; 1503 case AArch64::SUBWrr: 1504 return AArch64::SUBSWrr; 1505 case AArch64::SUBWri: 1506 return AArch64::SUBSWri; 1507 case AArch64::SUBXrr: 1508 return AArch64::SUBSXrr; 1509 case AArch64::SUBXri: 1510 return AArch64::SUBSXri; 1511 case AArch64::SBCWr: 1512 return AArch64::SBCSWr; 1513 case AArch64::SBCXr: 1514 return AArch64::SBCSXr; 1515 case AArch64::ANDWri: 1516 return AArch64::ANDSWri; 1517 case AArch64::ANDXri: 1518 return AArch64::ANDSXri; 1519 } 1520 } 1521 1522 /// Check if AArch64::NZCV should be alive in successors of MBB. 1523 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) { 1524 for (auto *BB : MBB->successors()) 1525 if (BB->isLiveIn(AArch64::NZCV)) 1526 return true; 1527 return false; 1528 } 1529 1530 /// \returns The condition code operand index for \p Instr if it is a branch 1531 /// or select and -1 otherwise. 1532 static int 1533 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) { 1534 switch (Instr.getOpcode()) { 1535 default: 1536 return -1; 1537 1538 case AArch64::Bcc: { 1539 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1540 assert(Idx >= 2); 1541 return Idx - 2; 1542 } 1543 1544 case AArch64::CSINVWr: 1545 case AArch64::CSINVXr: 1546 case AArch64::CSINCWr: 1547 case AArch64::CSINCXr: 1548 case AArch64::CSELWr: 1549 case AArch64::CSELXr: 1550 case AArch64::CSNEGWr: 1551 case AArch64::CSNEGXr: 1552 case AArch64::FCSELSrrr: 1553 case AArch64::FCSELDrrr: { 1554 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1555 assert(Idx >= 1); 1556 return Idx - 1; 1557 } 1558 } 1559 } 1560 1561 namespace { 1562 1563 struct UsedNZCV { 1564 bool N = false; 1565 bool Z = false; 1566 bool C = false; 1567 bool V = false; 1568 1569 UsedNZCV() = default; 1570 1571 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1572 this->N |= UsedFlags.N; 1573 this->Z |= UsedFlags.Z; 1574 this->C |= UsedFlags.C; 1575 this->V |= UsedFlags.V; 1576 return *this; 1577 } 1578 }; 1579 1580 } // end anonymous namespace 1581 1582 /// Find a condition code used by the instruction. 1583 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1584 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1585 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1586 int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr); 1587 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>( 1588 Instr.getOperand(CCIdx).getImm()) 1589 : AArch64CC::Invalid; 1590 } 1591 1592 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1593 assert(CC != AArch64CC::Invalid); 1594 UsedNZCV UsedFlags; 1595 switch (CC) { 1596 default: 1597 break; 1598 1599 case AArch64CC::EQ: // Z set 1600 case AArch64CC::NE: // Z clear 1601 UsedFlags.Z = true; 1602 break; 1603 1604 case AArch64CC::HI: // Z clear and C set 1605 case AArch64CC::LS: // Z set or C clear 1606 UsedFlags.Z = true; 1607 LLVM_FALLTHROUGH; 1608 case AArch64CC::HS: // C set 1609 case AArch64CC::LO: // C clear 1610 UsedFlags.C = true; 1611 break; 1612 1613 case AArch64CC::MI: // N set 1614 case AArch64CC::PL: // N clear 1615 UsedFlags.N = true; 1616 break; 1617 1618 case AArch64CC::VS: // V set 1619 case AArch64CC::VC: // V clear 1620 UsedFlags.V = true; 1621 break; 1622 1623 case AArch64CC::GT: // Z clear, N and V the same 1624 case AArch64CC::LE: // Z set, N and V differ 1625 UsedFlags.Z = true; 1626 LLVM_FALLTHROUGH; 1627 case AArch64CC::GE: // N and V the same 1628 case AArch64CC::LT: // N and V differ 1629 UsedFlags.N = true; 1630 UsedFlags.V = true; 1631 break; 1632 } 1633 return UsedFlags; 1634 } 1635 1636 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if they 1637 /// are not containing C or V flags and NZCV flags are not alive in successors 1638 /// of the same \p CmpInstr and \p MI parent. \returns None otherwise. 1639 /// 1640 /// Collect instructions using that flags in \p CCUseInstrs if provided. 1641 static Optional<UsedNZCV> 1642 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, 1643 const TargetRegisterInfo &TRI, 1644 SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) { 1645 MachineBasicBlock *CmpParent = CmpInstr.getParent(); 1646 if (MI.getParent() != CmpParent) 1647 return None; 1648 1649 if (areCFlagsAliveInSuccessors(CmpParent)) 1650 return None; 1651 1652 UsedNZCV NZCVUsedAfterCmp; 1653 for (MachineInstr &Instr : instructionsWithoutDebug( 1654 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) { 1655 if (Instr.readsRegister(AArch64::NZCV, &TRI)) { 1656 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1657 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1658 return None; 1659 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1660 if (CCUseInstrs) 1661 CCUseInstrs->push_back(&Instr); 1662 } 1663 if (Instr.modifiesRegister(AArch64::NZCV, &TRI)) 1664 break; 1665 } 1666 if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V) 1667 return None; 1668 return NZCVUsedAfterCmp; 1669 } 1670 1671 static bool isADDSRegImm(unsigned Opcode) { 1672 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1673 } 1674 1675 static bool isSUBSRegImm(unsigned Opcode) { 1676 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1677 } 1678 1679 /// Check if CmpInstr can be substituted by MI. 1680 /// 1681 /// CmpInstr can be substituted: 1682 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1683 /// - and, MI and CmpInstr are from the same MachineBB 1684 /// - and, condition flags are not alive in successors of the CmpInstr parent 1685 /// - and, if MI opcode is the S form there must be no defs of flags between 1686 /// MI and CmpInstr 1687 /// or if MI opcode is not the S form there must be neither defs of flags 1688 /// nor uses of flags between MI and CmpInstr. 1689 /// - and C/V flags are not used after CmpInstr 1690 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, 1691 const TargetRegisterInfo &TRI) { 1692 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END); 1693 1694 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1695 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1696 return false; 1697 1698 if (!examineCFlagsUse(MI, CmpInstr, TRI)) 1699 return false; 1700 1701 AccessKind AccessToCheck = AK_Write; 1702 if (sForm(MI) != MI.getOpcode()) 1703 AccessToCheck = AK_All; 1704 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck); 1705 } 1706 1707 /// Substitute an instruction comparing to zero with another instruction 1708 /// which produces needed condition flags. 1709 /// 1710 /// Return true on success. 1711 bool AArch64InstrInfo::substituteCmpToZero( 1712 MachineInstr &CmpInstr, unsigned SrcReg, 1713 const MachineRegisterInfo &MRI) const { 1714 // Get the unique definition of SrcReg. 1715 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 1716 if (!MI) 1717 return false; 1718 1719 const TargetRegisterInfo &TRI = getRegisterInfo(); 1720 1721 unsigned NewOpc = sForm(*MI); 1722 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1723 return false; 1724 1725 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI)) 1726 return false; 1727 1728 // Update the instruction to set NZCV. 1729 MI->setDesc(get(NewOpc)); 1730 CmpInstr.eraseFromParent(); 1731 bool succeeded = UpdateOperandRegClass(*MI); 1732 (void)succeeded; 1733 assert(succeeded && "Some operands reg class are incompatible!"); 1734 MI->addRegisterDefined(AArch64::NZCV, &TRI); 1735 return true; 1736 } 1737 1738 /// \returns True if \p CmpInstr can be removed. 1739 /// 1740 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition 1741 /// codes used in \p CCUseInstrs must be inverted. 1742 static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, 1743 int CmpValue, const TargetRegisterInfo &TRI, 1744 SmallVectorImpl<MachineInstr *> &CCUseInstrs, 1745 bool &IsInvertCC) { 1746 assert((CmpValue == 0 || CmpValue == 1) && 1747 "Only comparisons to 0 or 1 considered for removal!"); 1748 1749 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>' 1750 unsigned MIOpc = MI.getOpcode(); 1751 if (MIOpc == AArch64::CSINCWr) { 1752 if (MI.getOperand(1).getReg() != AArch64::WZR || 1753 MI.getOperand(2).getReg() != AArch64::WZR) 1754 return false; 1755 } else if (MIOpc == AArch64::CSINCXr) { 1756 if (MI.getOperand(1).getReg() != AArch64::XZR || 1757 MI.getOperand(2).getReg() != AArch64::XZR) 1758 return false; 1759 } else { 1760 return false; 1761 } 1762 AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI); 1763 if (MICC == AArch64CC::Invalid) 1764 return false; 1765 1766 // NZCV needs to be defined 1767 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 1768 return false; 1769 1770 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1' 1771 const unsigned CmpOpcode = CmpInstr.getOpcode(); 1772 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode); 1773 if (CmpValue && !IsSubsRegImm) 1774 return false; 1775 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode)) 1776 return false; 1777 1778 // MI conditions allowed: eq, ne, mi, pl 1779 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC); 1780 if (MIUsedNZCV.C || MIUsedNZCV.V) 1781 return false; 1782 1783 Optional<UsedNZCV> NZCVUsedAfterCmp = 1784 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs); 1785 // Condition flags are not used in CmpInstr basic block successors and only 1786 // Z or N flags allowed to be used after CmpInstr within its basic block 1787 if (!NZCVUsedAfterCmp) 1788 return false; 1789 // Z or N flag used after CmpInstr must correspond to the flag used in MI 1790 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) || 1791 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z)) 1792 return false; 1793 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne 1794 if (MIUsedNZCV.N && !CmpValue) 1795 return false; 1796 1797 // There must be no defs of flags between MI and CmpInstr 1798 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write)) 1799 return false; 1800 1801 // Condition code is inverted in the following cases: 1802 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1803 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1' 1804 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) || 1805 (!CmpValue && MICC == AArch64CC::NE); 1806 return true; 1807 } 1808 1809 /// Remove comparision in csinc-cmp sequence 1810 /// 1811 /// Examples: 1812 /// 1. \code 1813 /// csinc w9, wzr, wzr, ne 1814 /// cmp w9, #0 1815 /// b.eq 1816 /// \endcode 1817 /// to 1818 /// \code 1819 /// csinc w9, wzr, wzr, ne 1820 /// b.ne 1821 /// \endcode 1822 /// 1823 /// 2. \code 1824 /// csinc x2, xzr, xzr, mi 1825 /// cmp x2, #1 1826 /// b.pl 1827 /// \endcode 1828 /// to 1829 /// \code 1830 /// csinc x2, xzr, xzr, mi 1831 /// b.pl 1832 /// \endcode 1833 /// 1834 /// \param CmpInstr comparison instruction 1835 /// \return True when comparison removed 1836 bool AArch64InstrInfo::removeCmpToZeroOrOne( 1837 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue, 1838 const MachineRegisterInfo &MRI) const { 1839 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); 1840 if (!MI) 1841 return false; 1842 const TargetRegisterInfo &TRI = getRegisterInfo(); 1843 SmallVector<MachineInstr *, 4> CCUseInstrs; 1844 bool IsInvertCC = false; 1845 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs, 1846 IsInvertCC)) 1847 return false; 1848 // Make transformation 1849 CmpInstr.eraseFromParent(); 1850 if (IsInvertCC) { 1851 // Invert condition codes in CmpInstr CC users 1852 for (MachineInstr *CCUseInstr : CCUseInstrs) { 1853 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr); 1854 assert(Idx >= 0 && "Unexpected instruction using CC."); 1855 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx); 1856 AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode( 1857 static_cast<AArch64CC::CondCode>(CCOperand.getImm())); 1858 CCOperand.setImm(CCUse); 1859 } 1860 } 1861 return true; 1862 } 1863 1864 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1865 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1866 MI.getOpcode() != AArch64::CATCHRET) 1867 return false; 1868 1869 MachineBasicBlock &MBB = *MI.getParent(); 1870 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 1871 auto TRI = Subtarget.getRegisterInfo(); 1872 DebugLoc DL = MI.getDebugLoc(); 1873 1874 if (MI.getOpcode() == AArch64::CATCHRET) { 1875 // Skip to the first instruction before the epilog. 1876 const TargetInstrInfo *TII = 1877 MBB.getParent()->getSubtarget().getInstrInfo(); 1878 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1879 auto MBBI = MachineBasicBlock::iterator(MI); 1880 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1881 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1882 FirstEpilogSEH != MBB.begin()) 1883 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1884 if (FirstEpilogSEH != MBB.begin()) 1885 FirstEpilogSEH = std::next(FirstEpilogSEH); 1886 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1887 .addReg(AArch64::X0, RegState::Define) 1888 .addMBB(TargetMBB); 1889 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1890 .addReg(AArch64::X0, RegState::Define) 1891 .addReg(AArch64::X0) 1892 .addMBB(TargetMBB) 1893 .addImm(0); 1894 return true; 1895 } 1896 1897 Register Reg = MI.getOperand(0).getReg(); 1898 Module &M = *MBB.getParent()->getFunction().getParent(); 1899 if (M.getStackProtectorGuard() == "sysreg") { 1900 const AArch64SysReg::SysReg *SrcReg = 1901 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg()); 1902 if (!SrcReg) 1903 report_fatal_error("Unknown SysReg for Stack Protector Guard Register"); 1904 1905 // mrs xN, sysreg 1906 BuildMI(MBB, MI, DL, get(AArch64::MRS)) 1907 .addDef(Reg, RegState::Renamable) 1908 .addImm(SrcReg->Encoding); 1909 int Offset = M.getStackProtectorGuardOffset(); 1910 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) { 1911 // ldr xN, [xN, #offset] 1912 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 1913 .addDef(Reg) 1914 .addUse(Reg, RegState::Kill) 1915 .addImm(Offset / 8); 1916 } else if (Offset >= -256 && Offset <= 255) { 1917 // ldur xN, [xN, #offset] 1918 BuildMI(MBB, MI, DL, get(AArch64::LDURXi)) 1919 .addDef(Reg) 1920 .addUse(Reg, RegState::Kill) 1921 .addImm(Offset); 1922 } else if (Offset >= -4095 && Offset <= 4095) { 1923 if (Offset > 0) { 1924 // add xN, xN, #offset 1925 BuildMI(MBB, MI, DL, get(AArch64::ADDXri)) 1926 .addDef(Reg) 1927 .addUse(Reg, RegState::Kill) 1928 .addImm(Offset) 1929 .addImm(0); 1930 } else { 1931 // sub xN, xN, #offset 1932 BuildMI(MBB, MI, DL, get(AArch64::SUBXri)) 1933 .addDef(Reg) 1934 .addUse(Reg, RegState::Kill) 1935 .addImm(-Offset) 1936 .addImm(0); 1937 } 1938 // ldr xN, [xN] 1939 BuildMI(MBB, MI, DL, get(AArch64::LDRXui)) 1940 .addDef(Reg) 1941 .addUse(Reg, RegState::Kill) 1942 .addImm(0); 1943 } else { 1944 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger 1945 // than 23760. 1946 // It might be nice to use AArch64::MOVi32imm here, which would get 1947 // expanded in PreSched2 after PostRA, but our lone scratch Reg already 1948 // contains the MRS result. findScratchNonCalleeSaveRegister() in 1949 // AArch64FrameLowering might help us find such a scratch register 1950 // though. If we failed to find a scratch register, we could emit a 1951 // stream of add instructions to build up the immediate. Or, we could try 1952 // to insert a AArch64::MOVi32imm before register allocation so that we 1953 // didn't need to scavenge for a scratch register. 1954 report_fatal_error("Unable to encode Stack Protector Guard Offset"); 1955 } 1956 MBB.erase(MI); 1957 return true; 1958 } 1959 1960 const GlobalValue *GV = 1961 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1962 const TargetMachine &TM = MBB.getParent()->getTarget(); 1963 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1964 const unsigned char MO_NC = AArch64II::MO_NC; 1965 1966 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1967 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1968 .addGlobalAddress(GV, 0, OpFlags); 1969 if (Subtarget.isTargetILP32()) { 1970 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1971 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1972 .addDef(Reg32, RegState::Dead) 1973 .addUse(Reg, RegState::Kill) 1974 .addImm(0) 1975 .addMemOperand(*MI.memoperands_begin()) 1976 .addDef(Reg, RegState::Implicit); 1977 } else { 1978 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1979 .addReg(Reg, RegState::Kill) 1980 .addImm(0) 1981 .addMemOperand(*MI.memoperands_begin()); 1982 } 1983 } else if (TM.getCodeModel() == CodeModel::Large) { 1984 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); 1985 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1986 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1987 .addImm(0); 1988 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1989 .addReg(Reg, RegState::Kill) 1990 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1991 .addImm(16); 1992 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1993 .addReg(Reg, RegState::Kill) 1994 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1995 .addImm(32); 1996 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1997 .addReg(Reg, RegState::Kill) 1998 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1999 .addImm(48); 2000 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2001 .addReg(Reg, RegState::Kill) 2002 .addImm(0) 2003 .addMemOperand(*MI.memoperands_begin()); 2004 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2005 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 2006 .addGlobalAddress(GV, 0, OpFlags); 2007 } else { 2008 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 2009 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 2010 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 2011 if (Subtarget.isTargetILP32()) { 2012 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 2013 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 2014 .addDef(Reg32, RegState::Dead) 2015 .addUse(Reg, RegState::Kill) 2016 .addGlobalAddress(GV, 0, LoFlags) 2017 .addMemOperand(*MI.memoperands_begin()) 2018 .addDef(Reg, RegState::Implicit); 2019 } else { 2020 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 2021 .addReg(Reg, RegState::Kill) 2022 .addGlobalAddress(GV, 0, LoFlags) 2023 .addMemOperand(*MI.memoperands_begin()); 2024 } 2025 } 2026 2027 MBB.erase(MI); 2028 2029 return true; 2030 } 2031 2032 // Return true if this instruction simply sets its single destination register 2033 // to zero. This is equivalent to a register rename of the zero-register. 2034 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 2035 switch (MI.getOpcode()) { 2036 default: 2037 break; 2038 case AArch64::MOVZWi: 2039 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 2040 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 2041 assert(MI.getDesc().getNumOperands() == 3 && 2042 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 2043 return true; 2044 } 2045 break; 2046 case AArch64::ANDWri: // and Rd, Rzr, #imm 2047 return MI.getOperand(1).getReg() == AArch64::WZR; 2048 case AArch64::ANDXri: 2049 return MI.getOperand(1).getReg() == AArch64::XZR; 2050 case TargetOpcode::COPY: 2051 return MI.getOperand(1).getReg() == AArch64::WZR; 2052 } 2053 return false; 2054 } 2055 2056 // Return true if this instruction simply renames a general register without 2057 // modifying bits. 2058 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 2059 switch (MI.getOpcode()) { 2060 default: 2061 break; 2062 case TargetOpcode::COPY: { 2063 // GPR32 copies will by lowered to ORRXrs 2064 Register DstReg = MI.getOperand(0).getReg(); 2065 return (AArch64::GPR32RegClass.contains(DstReg) || 2066 AArch64::GPR64RegClass.contains(DstReg)); 2067 } 2068 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 2069 if (MI.getOperand(1).getReg() == AArch64::XZR) { 2070 assert(MI.getDesc().getNumOperands() == 4 && 2071 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 2072 return true; 2073 } 2074 break; 2075 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 2076 if (MI.getOperand(2).getImm() == 0) { 2077 assert(MI.getDesc().getNumOperands() == 4 && 2078 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 2079 return true; 2080 } 2081 break; 2082 } 2083 return false; 2084 } 2085 2086 // Return true if this instruction simply renames a general register without 2087 // modifying bits. 2088 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 2089 switch (MI.getOpcode()) { 2090 default: 2091 break; 2092 case TargetOpcode::COPY: { 2093 Register DstReg = MI.getOperand(0).getReg(); 2094 return AArch64::FPR128RegClass.contains(DstReg); 2095 } 2096 case AArch64::ORRv16i8: 2097 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 2098 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 2099 "invalid ORRv16i8 operands"); 2100 return true; 2101 } 2102 break; 2103 } 2104 return false; 2105 } 2106 2107 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 2108 int &FrameIndex) const { 2109 switch (MI.getOpcode()) { 2110 default: 2111 break; 2112 case AArch64::LDRWui: 2113 case AArch64::LDRXui: 2114 case AArch64::LDRBui: 2115 case AArch64::LDRHui: 2116 case AArch64::LDRSui: 2117 case AArch64::LDRDui: 2118 case AArch64::LDRQui: 2119 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2120 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2121 FrameIndex = MI.getOperand(1).getIndex(); 2122 return MI.getOperand(0).getReg(); 2123 } 2124 break; 2125 } 2126 2127 return 0; 2128 } 2129 2130 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 2131 int &FrameIndex) const { 2132 switch (MI.getOpcode()) { 2133 default: 2134 break; 2135 case AArch64::STRWui: 2136 case AArch64::STRXui: 2137 case AArch64::STRBui: 2138 case AArch64::STRHui: 2139 case AArch64::STRSui: 2140 case AArch64::STRDui: 2141 case AArch64::STRQui: 2142 case AArch64::LDR_PXI: 2143 case AArch64::STR_PXI: 2144 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 2145 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 2146 FrameIndex = MI.getOperand(1).getIndex(); 2147 return MI.getOperand(0).getReg(); 2148 } 2149 break; 2150 } 2151 return 0; 2152 } 2153 2154 /// Check all MachineMemOperands for a hint to suppress pairing. 2155 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 2156 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2157 return MMO->getFlags() & MOSuppressPair; 2158 }); 2159 } 2160 2161 /// Set a flag on the first MachineMemOperand to suppress pairing. 2162 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 2163 if (MI.memoperands_empty()) 2164 return; 2165 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 2166 } 2167 2168 /// Check all MachineMemOperands for a hint that the load/store is strided. 2169 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 2170 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 2171 return MMO->getFlags() & MOStridedAccess; 2172 }); 2173 } 2174 2175 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { 2176 switch (Opc) { 2177 default: 2178 return false; 2179 case AArch64::STURSi: 2180 case AArch64::STRSpre: 2181 case AArch64::STURDi: 2182 case AArch64::STRDpre: 2183 case AArch64::STURQi: 2184 case AArch64::STRQpre: 2185 case AArch64::STURBBi: 2186 case AArch64::STURHHi: 2187 case AArch64::STURWi: 2188 case AArch64::STRWpre: 2189 case AArch64::STURXi: 2190 case AArch64::STRXpre: 2191 case AArch64::LDURSi: 2192 case AArch64::LDRSpre: 2193 case AArch64::LDURDi: 2194 case AArch64::LDRDpre: 2195 case AArch64::LDURQi: 2196 case AArch64::LDRQpre: 2197 case AArch64::LDURWi: 2198 case AArch64::LDRWpre: 2199 case AArch64::LDURXi: 2200 case AArch64::LDRXpre: 2201 case AArch64::LDURSWi: 2202 case AArch64::LDURHHi: 2203 case AArch64::LDURBBi: 2204 case AArch64::LDURSBWi: 2205 case AArch64::LDURSHWi: 2206 return true; 2207 } 2208 } 2209 2210 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 2211 switch (Opc) { 2212 default: return {}; 2213 case AArch64::PRFMui: return AArch64::PRFUMi; 2214 case AArch64::LDRXui: return AArch64::LDURXi; 2215 case AArch64::LDRWui: return AArch64::LDURWi; 2216 case AArch64::LDRBui: return AArch64::LDURBi; 2217 case AArch64::LDRHui: return AArch64::LDURHi; 2218 case AArch64::LDRSui: return AArch64::LDURSi; 2219 case AArch64::LDRDui: return AArch64::LDURDi; 2220 case AArch64::LDRQui: return AArch64::LDURQi; 2221 case AArch64::LDRBBui: return AArch64::LDURBBi; 2222 case AArch64::LDRHHui: return AArch64::LDURHHi; 2223 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 2224 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 2225 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 2226 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 2227 case AArch64::LDRSWui: return AArch64::LDURSWi; 2228 case AArch64::STRXui: return AArch64::STURXi; 2229 case AArch64::STRWui: return AArch64::STURWi; 2230 case AArch64::STRBui: return AArch64::STURBi; 2231 case AArch64::STRHui: return AArch64::STURHi; 2232 case AArch64::STRSui: return AArch64::STURSi; 2233 case AArch64::STRDui: return AArch64::STURDi; 2234 case AArch64::STRQui: return AArch64::STURQi; 2235 case AArch64::STRBBui: return AArch64::STURBBi; 2236 case AArch64::STRHHui: return AArch64::STURHHi; 2237 } 2238 } 2239 2240 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 2241 switch (Opc) { 2242 default: 2243 return 2; 2244 case AArch64::LDPXi: 2245 case AArch64::LDPDi: 2246 case AArch64::STPXi: 2247 case AArch64::STPDi: 2248 case AArch64::LDNPXi: 2249 case AArch64::LDNPDi: 2250 case AArch64::STNPXi: 2251 case AArch64::STNPDi: 2252 case AArch64::LDPQi: 2253 case AArch64::STPQi: 2254 case AArch64::LDNPQi: 2255 case AArch64::STNPQi: 2256 case AArch64::LDPWi: 2257 case AArch64::LDPSi: 2258 case AArch64::STPWi: 2259 case AArch64::STPSi: 2260 case AArch64::LDNPWi: 2261 case AArch64::LDNPSi: 2262 case AArch64::STNPWi: 2263 case AArch64::STNPSi: 2264 case AArch64::LDG: 2265 case AArch64::STGPi: 2266 2267 case AArch64::LD1B_IMM: 2268 case AArch64::LD1B_H_IMM: 2269 case AArch64::LD1B_S_IMM: 2270 case AArch64::LD1B_D_IMM: 2271 case AArch64::LD1SB_H_IMM: 2272 case AArch64::LD1SB_S_IMM: 2273 case AArch64::LD1SB_D_IMM: 2274 case AArch64::LD1H_IMM: 2275 case AArch64::LD1H_S_IMM: 2276 case AArch64::LD1H_D_IMM: 2277 case AArch64::LD1SH_S_IMM: 2278 case AArch64::LD1SH_D_IMM: 2279 case AArch64::LD1W_IMM: 2280 case AArch64::LD1W_D_IMM: 2281 case AArch64::LD1SW_D_IMM: 2282 case AArch64::LD1D_IMM: 2283 2284 case AArch64::ST1B_IMM: 2285 case AArch64::ST1B_H_IMM: 2286 case AArch64::ST1B_S_IMM: 2287 case AArch64::ST1B_D_IMM: 2288 case AArch64::ST1H_IMM: 2289 case AArch64::ST1H_S_IMM: 2290 case AArch64::ST1H_D_IMM: 2291 case AArch64::ST1W_IMM: 2292 case AArch64::ST1W_D_IMM: 2293 case AArch64::ST1D_IMM: 2294 2295 case AArch64::LD1RB_IMM: 2296 case AArch64::LD1RB_H_IMM: 2297 case AArch64::LD1RB_S_IMM: 2298 case AArch64::LD1RB_D_IMM: 2299 case AArch64::LD1RSB_H_IMM: 2300 case AArch64::LD1RSB_S_IMM: 2301 case AArch64::LD1RSB_D_IMM: 2302 case AArch64::LD1RH_IMM: 2303 case AArch64::LD1RH_S_IMM: 2304 case AArch64::LD1RH_D_IMM: 2305 case AArch64::LD1RSH_S_IMM: 2306 case AArch64::LD1RSH_D_IMM: 2307 case AArch64::LD1RW_IMM: 2308 case AArch64::LD1RW_D_IMM: 2309 case AArch64::LD1RSW_IMM: 2310 case AArch64::LD1RD_IMM: 2311 2312 case AArch64::LDNT1B_ZRI: 2313 case AArch64::LDNT1H_ZRI: 2314 case AArch64::LDNT1W_ZRI: 2315 case AArch64::LDNT1D_ZRI: 2316 case AArch64::STNT1B_ZRI: 2317 case AArch64::STNT1H_ZRI: 2318 case AArch64::STNT1W_ZRI: 2319 case AArch64::STNT1D_ZRI: 2320 2321 case AArch64::LDNF1B_IMM: 2322 case AArch64::LDNF1B_H_IMM: 2323 case AArch64::LDNF1B_S_IMM: 2324 case AArch64::LDNF1B_D_IMM: 2325 case AArch64::LDNF1SB_H_IMM: 2326 case AArch64::LDNF1SB_S_IMM: 2327 case AArch64::LDNF1SB_D_IMM: 2328 case AArch64::LDNF1H_IMM: 2329 case AArch64::LDNF1H_S_IMM: 2330 case AArch64::LDNF1H_D_IMM: 2331 case AArch64::LDNF1SH_S_IMM: 2332 case AArch64::LDNF1SH_D_IMM: 2333 case AArch64::LDNF1W_IMM: 2334 case AArch64::LDNF1W_D_IMM: 2335 case AArch64::LDNF1SW_D_IMM: 2336 case AArch64::LDNF1D_IMM: 2337 return 3; 2338 case AArch64::ADDG: 2339 case AArch64::STGOffset: 2340 case AArch64::LDR_PXI: 2341 case AArch64::STR_PXI: 2342 return 2; 2343 } 2344 } 2345 2346 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 2347 switch (MI.getOpcode()) { 2348 default: 2349 return false; 2350 // Scaled instructions. 2351 case AArch64::STRSui: 2352 case AArch64::STRDui: 2353 case AArch64::STRQui: 2354 case AArch64::STRXui: 2355 case AArch64::STRWui: 2356 case AArch64::LDRSui: 2357 case AArch64::LDRDui: 2358 case AArch64::LDRQui: 2359 case AArch64::LDRXui: 2360 case AArch64::LDRWui: 2361 case AArch64::LDRSWui: 2362 // Unscaled instructions. 2363 case AArch64::STURSi: 2364 case AArch64::STRSpre: 2365 case AArch64::STURDi: 2366 case AArch64::STRDpre: 2367 case AArch64::STURQi: 2368 case AArch64::STRQpre: 2369 case AArch64::STURWi: 2370 case AArch64::STRWpre: 2371 case AArch64::STURXi: 2372 case AArch64::STRXpre: 2373 case AArch64::LDURSi: 2374 case AArch64::LDRSpre: 2375 case AArch64::LDURDi: 2376 case AArch64::LDRDpre: 2377 case AArch64::LDURQi: 2378 case AArch64::LDRQpre: 2379 case AArch64::LDURWi: 2380 case AArch64::LDRWpre: 2381 case AArch64::LDURXi: 2382 case AArch64::LDRXpre: 2383 case AArch64::LDURSWi: 2384 return true; 2385 } 2386 } 2387 2388 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 2389 bool &Is64Bit) { 2390 switch (Opc) { 2391 default: 2392 llvm_unreachable("Opcode has no flag setting equivalent!"); 2393 // 32-bit cases: 2394 case AArch64::ADDWri: 2395 Is64Bit = false; 2396 return AArch64::ADDSWri; 2397 case AArch64::ADDWrr: 2398 Is64Bit = false; 2399 return AArch64::ADDSWrr; 2400 case AArch64::ADDWrs: 2401 Is64Bit = false; 2402 return AArch64::ADDSWrs; 2403 case AArch64::ADDWrx: 2404 Is64Bit = false; 2405 return AArch64::ADDSWrx; 2406 case AArch64::ANDWri: 2407 Is64Bit = false; 2408 return AArch64::ANDSWri; 2409 case AArch64::ANDWrr: 2410 Is64Bit = false; 2411 return AArch64::ANDSWrr; 2412 case AArch64::ANDWrs: 2413 Is64Bit = false; 2414 return AArch64::ANDSWrs; 2415 case AArch64::BICWrr: 2416 Is64Bit = false; 2417 return AArch64::BICSWrr; 2418 case AArch64::BICWrs: 2419 Is64Bit = false; 2420 return AArch64::BICSWrs; 2421 case AArch64::SUBWri: 2422 Is64Bit = false; 2423 return AArch64::SUBSWri; 2424 case AArch64::SUBWrr: 2425 Is64Bit = false; 2426 return AArch64::SUBSWrr; 2427 case AArch64::SUBWrs: 2428 Is64Bit = false; 2429 return AArch64::SUBSWrs; 2430 case AArch64::SUBWrx: 2431 Is64Bit = false; 2432 return AArch64::SUBSWrx; 2433 // 64-bit cases: 2434 case AArch64::ADDXri: 2435 Is64Bit = true; 2436 return AArch64::ADDSXri; 2437 case AArch64::ADDXrr: 2438 Is64Bit = true; 2439 return AArch64::ADDSXrr; 2440 case AArch64::ADDXrs: 2441 Is64Bit = true; 2442 return AArch64::ADDSXrs; 2443 case AArch64::ADDXrx: 2444 Is64Bit = true; 2445 return AArch64::ADDSXrx; 2446 case AArch64::ANDXri: 2447 Is64Bit = true; 2448 return AArch64::ANDSXri; 2449 case AArch64::ANDXrr: 2450 Is64Bit = true; 2451 return AArch64::ANDSXrr; 2452 case AArch64::ANDXrs: 2453 Is64Bit = true; 2454 return AArch64::ANDSXrs; 2455 case AArch64::BICXrr: 2456 Is64Bit = true; 2457 return AArch64::BICSXrr; 2458 case AArch64::BICXrs: 2459 Is64Bit = true; 2460 return AArch64::BICSXrs; 2461 case AArch64::SUBXri: 2462 Is64Bit = true; 2463 return AArch64::SUBSXri; 2464 case AArch64::SUBXrr: 2465 Is64Bit = true; 2466 return AArch64::SUBSXrr; 2467 case AArch64::SUBXrs: 2468 Is64Bit = true; 2469 return AArch64::SUBSXrs; 2470 case AArch64::SUBXrx: 2471 Is64Bit = true; 2472 return AArch64::SUBSXrx; 2473 } 2474 } 2475 2476 // Is this a candidate for ld/st merging or pairing? For example, we don't 2477 // touch volatiles or load/stores that have a hint to avoid pair formation. 2478 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 2479 2480 bool IsPreLdSt = isPreLdSt(MI); 2481 2482 // If this is a volatile load/store, don't mess with it. 2483 if (MI.hasOrderedMemoryRef()) 2484 return false; 2485 2486 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 2487 // For Pre-inc LD/ST, the operand is shifted by one. 2488 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() || 2489 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) && 2490 "Expected a reg or frame index operand."); 2491 2492 // For Pre-indexed addressing quadword instructions, the third operand is the 2493 // immediate value. 2494 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm(); 2495 2496 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt) 2497 return false; 2498 2499 // Can't merge/pair if the instruction modifies the base register. 2500 // e.g., ldr x0, [x0] 2501 // This case will never occur with an FI base. 2502 // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged. 2503 // For example: 2504 // ldr q0, [x11, #32]! 2505 // ldr q1, [x11, #16] 2506 // to 2507 // ldp q0, q1, [x11, #32]! 2508 if (MI.getOperand(1).isReg() && !IsPreLdSt) { 2509 Register BaseReg = MI.getOperand(1).getReg(); 2510 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2511 if (MI.modifiesRegister(BaseReg, TRI)) 2512 return false; 2513 } 2514 2515 // Check if this load/store has a hint to avoid pair formation. 2516 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2517 if (isLdStPairSuppressed(MI)) 2518 return false; 2519 2520 // Do not pair any callee-save store/reload instructions in the 2521 // prologue/epilogue if the CFI information encoded the operations as separate 2522 // instructions, as that will cause the size of the actual prologue to mismatch 2523 // with the prologue size recorded in the Windows CFI. 2524 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 2525 bool NeedsWinCFI = MAI->usesWindowsCFI() && 2526 MI.getMF()->getFunction().needsUnwindTableEntry(); 2527 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 2528 MI.getFlag(MachineInstr::FrameDestroy))) 2529 return false; 2530 2531 // On some CPUs quad load/store pairs are slower than two single load/stores. 2532 if (Subtarget.isPaired128Slow()) { 2533 switch (MI.getOpcode()) { 2534 default: 2535 break; 2536 case AArch64::LDURQi: 2537 case AArch64::STURQi: 2538 case AArch64::LDRQui: 2539 case AArch64::STRQui: 2540 return false; 2541 } 2542 } 2543 2544 return true; 2545 } 2546 2547 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( 2548 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2549 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2550 const TargetRegisterInfo *TRI) const { 2551 if (!LdSt.mayLoadOrStore()) 2552 return false; 2553 2554 const MachineOperand *BaseOp; 2555 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, 2556 Width, TRI)) 2557 return false; 2558 BaseOps.push_back(BaseOp); 2559 return true; 2560 } 2561 2562 Optional<ExtAddrMode> 2563 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, 2564 const TargetRegisterInfo *TRI) const { 2565 const MachineOperand *Base; // Filled with the base operand of MI. 2566 int64_t Offset; // Filled with the offset of MI. 2567 bool OffsetIsScalable; 2568 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) 2569 return None; 2570 2571 if (!Base->isReg()) 2572 return None; 2573 ExtAddrMode AM; 2574 AM.BaseReg = Base->getReg(); 2575 AM.Displacement = Offset; 2576 AM.ScaledReg = 0; 2577 AM.Scale = 0; 2578 return AM; 2579 } 2580 2581 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 2582 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 2583 bool &OffsetIsScalable, unsigned &Width, 2584 const TargetRegisterInfo *TRI) const { 2585 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2586 // Handle only loads/stores with base register followed by immediate offset. 2587 if (LdSt.getNumExplicitOperands() == 3) { 2588 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 2589 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2590 !LdSt.getOperand(2).isImm()) 2591 return false; 2592 } else if (LdSt.getNumExplicitOperands() == 4) { 2593 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 2594 if (!LdSt.getOperand(1).isReg() || 2595 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 2596 !LdSt.getOperand(3).isImm()) 2597 return false; 2598 } else 2599 return false; 2600 2601 // Get the scaling factor for the instruction and set the width for the 2602 // instruction. 2603 TypeSize Scale(0U, false); 2604 int64_t Dummy1, Dummy2; 2605 2606 // If this returns false, then it's an instruction we don't want to handle. 2607 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 2608 return false; 2609 2610 // Compute the offset. Offset is calculated as the immediate operand 2611 // multiplied by the scaling factor. Unscaled instructions have scaling factor 2612 // set to 1. 2613 if (LdSt.getNumExplicitOperands() == 3) { 2614 BaseOp = &LdSt.getOperand(1); 2615 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); 2616 } else { 2617 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2618 BaseOp = &LdSt.getOperand(2); 2619 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); 2620 } 2621 OffsetIsScalable = Scale.isScalable(); 2622 2623 if (!BaseOp->isReg() && !BaseOp->isFI()) 2624 return false; 2625 2626 return true; 2627 } 2628 2629 MachineOperand & 2630 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2631 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2632 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2633 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2634 return OfsOp; 2635 } 2636 2637 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, 2638 unsigned &Width, int64_t &MinOffset, 2639 int64_t &MaxOffset) { 2640 const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; 2641 switch (Opcode) { 2642 // Not a memory operation or something we want to handle. 2643 default: 2644 Scale = TypeSize::Fixed(0); 2645 Width = 0; 2646 MinOffset = MaxOffset = 0; 2647 return false; 2648 case AArch64::STRWpost: 2649 case AArch64::LDRWpost: 2650 Width = 32; 2651 Scale = TypeSize::Fixed(4); 2652 MinOffset = -256; 2653 MaxOffset = 255; 2654 break; 2655 case AArch64::LDURQi: 2656 case AArch64::STURQi: 2657 Width = 16; 2658 Scale = TypeSize::Fixed(1); 2659 MinOffset = -256; 2660 MaxOffset = 255; 2661 break; 2662 case AArch64::PRFUMi: 2663 case AArch64::LDURXi: 2664 case AArch64::LDURDi: 2665 case AArch64::STURXi: 2666 case AArch64::STURDi: 2667 Width = 8; 2668 Scale = TypeSize::Fixed(1); 2669 MinOffset = -256; 2670 MaxOffset = 255; 2671 break; 2672 case AArch64::LDURWi: 2673 case AArch64::LDURSi: 2674 case AArch64::LDURSWi: 2675 case AArch64::STURWi: 2676 case AArch64::STURSi: 2677 Width = 4; 2678 Scale = TypeSize::Fixed(1); 2679 MinOffset = -256; 2680 MaxOffset = 255; 2681 break; 2682 case AArch64::LDURHi: 2683 case AArch64::LDURHHi: 2684 case AArch64::LDURSHXi: 2685 case AArch64::LDURSHWi: 2686 case AArch64::STURHi: 2687 case AArch64::STURHHi: 2688 Width = 2; 2689 Scale = TypeSize::Fixed(1); 2690 MinOffset = -256; 2691 MaxOffset = 255; 2692 break; 2693 case AArch64::LDURBi: 2694 case AArch64::LDURBBi: 2695 case AArch64::LDURSBXi: 2696 case AArch64::LDURSBWi: 2697 case AArch64::STURBi: 2698 case AArch64::STURBBi: 2699 Width = 1; 2700 Scale = TypeSize::Fixed(1); 2701 MinOffset = -256; 2702 MaxOffset = 255; 2703 break; 2704 case AArch64::LDPQi: 2705 case AArch64::LDNPQi: 2706 case AArch64::STPQi: 2707 case AArch64::STNPQi: 2708 Scale = TypeSize::Fixed(16); 2709 Width = 32; 2710 MinOffset = -64; 2711 MaxOffset = 63; 2712 break; 2713 case AArch64::LDRQui: 2714 case AArch64::STRQui: 2715 Scale = TypeSize::Fixed(16); 2716 Width = 16; 2717 MinOffset = 0; 2718 MaxOffset = 4095; 2719 break; 2720 case AArch64::LDPXi: 2721 case AArch64::LDPDi: 2722 case AArch64::LDNPXi: 2723 case AArch64::LDNPDi: 2724 case AArch64::STPXi: 2725 case AArch64::STPDi: 2726 case AArch64::STNPXi: 2727 case AArch64::STNPDi: 2728 Scale = TypeSize::Fixed(8); 2729 Width = 16; 2730 MinOffset = -64; 2731 MaxOffset = 63; 2732 break; 2733 case AArch64::PRFMui: 2734 case AArch64::LDRXui: 2735 case AArch64::LDRDui: 2736 case AArch64::STRXui: 2737 case AArch64::STRDui: 2738 Scale = TypeSize::Fixed(8); 2739 Width = 8; 2740 MinOffset = 0; 2741 MaxOffset = 4095; 2742 break; 2743 case AArch64::StoreSwiftAsyncContext: 2744 // Store is an STRXui, but there might be an ADDXri in the expansion too. 2745 Scale = TypeSize::Fixed(1); 2746 Width = 8; 2747 MinOffset = 0; 2748 MaxOffset = 4095; 2749 break; 2750 case AArch64::LDPWi: 2751 case AArch64::LDPSi: 2752 case AArch64::LDNPWi: 2753 case AArch64::LDNPSi: 2754 case AArch64::STPWi: 2755 case AArch64::STPSi: 2756 case AArch64::STNPWi: 2757 case AArch64::STNPSi: 2758 Scale = TypeSize::Fixed(4); 2759 Width = 8; 2760 MinOffset = -64; 2761 MaxOffset = 63; 2762 break; 2763 case AArch64::LDRWui: 2764 case AArch64::LDRSui: 2765 case AArch64::LDRSWui: 2766 case AArch64::STRWui: 2767 case AArch64::STRSui: 2768 Scale = TypeSize::Fixed(4); 2769 Width = 4; 2770 MinOffset = 0; 2771 MaxOffset = 4095; 2772 break; 2773 case AArch64::LDRHui: 2774 case AArch64::LDRHHui: 2775 case AArch64::LDRSHWui: 2776 case AArch64::LDRSHXui: 2777 case AArch64::STRHui: 2778 case AArch64::STRHHui: 2779 Scale = TypeSize::Fixed(2); 2780 Width = 2; 2781 MinOffset = 0; 2782 MaxOffset = 4095; 2783 break; 2784 case AArch64::LDRBui: 2785 case AArch64::LDRBBui: 2786 case AArch64::LDRSBWui: 2787 case AArch64::LDRSBXui: 2788 case AArch64::STRBui: 2789 case AArch64::STRBBui: 2790 Scale = TypeSize::Fixed(1); 2791 Width = 1; 2792 MinOffset = 0; 2793 MaxOffset = 4095; 2794 break; 2795 case AArch64::STPXpre: 2796 case AArch64::LDPXpost: 2797 case AArch64::STPDpre: 2798 case AArch64::LDPDpost: 2799 Scale = TypeSize::Fixed(8); 2800 Width = 8; 2801 MinOffset = -512; 2802 MaxOffset = 504; 2803 break; 2804 case AArch64::STPQpre: 2805 case AArch64::LDPQpost: 2806 Scale = TypeSize::Fixed(16); 2807 Width = 16; 2808 MinOffset = -1024; 2809 MaxOffset = 1008; 2810 break; 2811 case AArch64::STRXpre: 2812 case AArch64::STRDpre: 2813 case AArch64::LDRXpost: 2814 case AArch64::LDRDpost: 2815 Scale = TypeSize::Fixed(1); 2816 Width = 8; 2817 MinOffset = -256; 2818 MaxOffset = 255; 2819 break; 2820 case AArch64::STRQpre: 2821 case AArch64::LDRQpost: 2822 Scale = TypeSize::Fixed(1); 2823 Width = 16; 2824 MinOffset = -256; 2825 MaxOffset = 255; 2826 break; 2827 case AArch64::ADDG: 2828 Scale = TypeSize::Fixed(16); 2829 Width = 0; 2830 MinOffset = 0; 2831 MaxOffset = 63; 2832 break; 2833 case AArch64::TAGPstack: 2834 Scale = TypeSize::Fixed(16); 2835 Width = 0; 2836 // TAGP with a negative offset turns into SUBP, which has a maximum offset 2837 // of 63 (not 64!). 2838 MinOffset = -63; 2839 MaxOffset = 63; 2840 break; 2841 case AArch64::LDG: 2842 case AArch64::STGOffset: 2843 case AArch64::STZGOffset: 2844 Scale = TypeSize::Fixed(16); 2845 Width = 16; 2846 MinOffset = -256; 2847 MaxOffset = 255; 2848 break; 2849 case AArch64::STR_ZZZZXI: 2850 case AArch64::LDR_ZZZZXI: 2851 Scale = TypeSize::Scalable(16); 2852 Width = SVEMaxBytesPerVector * 4; 2853 MinOffset = -256; 2854 MaxOffset = 252; 2855 break; 2856 case AArch64::STR_ZZZXI: 2857 case AArch64::LDR_ZZZXI: 2858 Scale = TypeSize::Scalable(16); 2859 Width = SVEMaxBytesPerVector * 3; 2860 MinOffset = -256; 2861 MaxOffset = 253; 2862 break; 2863 case AArch64::STR_ZZXI: 2864 case AArch64::LDR_ZZXI: 2865 Scale = TypeSize::Scalable(16); 2866 Width = SVEMaxBytesPerVector * 2; 2867 MinOffset = -256; 2868 MaxOffset = 254; 2869 break; 2870 case AArch64::LDR_PXI: 2871 case AArch64::STR_PXI: 2872 Scale = TypeSize::Scalable(2); 2873 Width = SVEMaxBytesPerVector / 8; 2874 MinOffset = -256; 2875 MaxOffset = 255; 2876 break; 2877 case AArch64::LDR_ZXI: 2878 case AArch64::STR_ZXI: 2879 Scale = TypeSize::Scalable(16); 2880 Width = SVEMaxBytesPerVector; 2881 MinOffset = -256; 2882 MaxOffset = 255; 2883 break; 2884 case AArch64::LD1B_IMM: 2885 case AArch64::LD1H_IMM: 2886 case AArch64::LD1W_IMM: 2887 case AArch64::LD1D_IMM: 2888 case AArch64::LDNT1B_ZRI: 2889 case AArch64::LDNT1H_ZRI: 2890 case AArch64::LDNT1W_ZRI: 2891 case AArch64::LDNT1D_ZRI: 2892 case AArch64::ST1B_IMM: 2893 case AArch64::ST1H_IMM: 2894 case AArch64::ST1W_IMM: 2895 case AArch64::ST1D_IMM: 2896 case AArch64::STNT1B_ZRI: 2897 case AArch64::STNT1H_ZRI: 2898 case AArch64::STNT1W_ZRI: 2899 case AArch64::STNT1D_ZRI: 2900 case AArch64::LDNF1B_IMM: 2901 case AArch64::LDNF1H_IMM: 2902 case AArch64::LDNF1W_IMM: 2903 case AArch64::LDNF1D_IMM: 2904 // A full vectors worth of data 2905 // Width = mbytes * elements 2906 Scale = TypeSize::Scalable(16); 2907 Width = SVEMaxBytesPerVector; 2908 MinOffset = -8; 2909 MaxOffset = 7; 2910 break; 2911 case AArch64::LD1B_H_IMM: 2912 case AArch64::LD1SB_H_IMM: 2913 case AArch64::LD1H_S_IMM: 2914 case AArch64::LD1SH_S_IMM: 2915 case AArch64::LD1W_D_IMM: 2916 case AArch64::LD1SW_D_IMM: 2917 case AArch64::ST1B_H_IMM: 2918 case AArch64::ST1H_S_IMM: 2919 case AArch64::ST1W_D_IMM: 2920 case AArch64::LDNF1B_H_IMM: 2921 case AArch64::LDNF1SB_H_IMM: 2922 case AArch64::LDNF1H_S_IMM: 2923 case AArch64::LDNF1SH_S_IMM: 2924 case AArch64::LDNF1W_D_IMM: 2925 case AArch64::LDNF1SW_D_IMM: 2926 // A half vector worth of data 2927 // Width = mbytes * elements 2928 Scale = TypeSize::Scalable(8); 2929 Width = SVEMaxBytesPerVector / 2; 2930 MinOffset = -8; 2931 MaxOffset = 7; 2932 break; 2933 case AArch64::LD1B_S_IMM: 2934 case AArch64::LD1SB_S_IMM: 2935 case AArch64::LD1H_D_IMM: 2936 case AArch64::LD1SH_D_IMM: 2937 case AArch64::ST1B_S_IMM: 2938 case AArch64::ST1H_D_IMM: 2939 case AArch64::LDNF1B_S_IMM: 2940 case AArch64::LDNF1SB_S_IMM: 2941 case AArch64::LDNF1H_D_IMM: 2942 case AArch64::LDNF1SH_D_IMM: 2943 // A quarter vector worth of data 2944 // Width = mbytes * elements 2945 Scale = TypeSize::Scalable(4); 2946 Width = SVEMaxBytesPerVector / 4; 2947 MinOffset = -8; 2948 MaxOffset = 7; 2949 break; 2950 case AArch64::LD1B_D_IMM: 2951 case AArch64::LD1SB_D_IMM: 2952 case AArch64::ST1B_D_IMM: 2953 case AArch64::LDNF1B_D_IMM: 2954 case AArch64::LDNF1SB_D_IMM: 2955 // A eighth vector worth of data 2956 // Width = mbytes * elements 2957 Scale = TypeSize::Scalable(2); 2958 Width = SVEMaxBytesPerVector / 8; 2959 MinOffset = -8; 2960 MaxOffset = 7; 2961 break; 2962 case AArch64::ST2GOffset: 2963 case AArch64::STZ2GOffset: 2964 Scale = TypeSize::Fixed(16); 2965 Width = 32; 2966 MinOffset = -256; 2967 MaxOffset = 255; 2968 break; 2969 case AArch64::STGPi: 2970 Scale = TypeSize::Fixed(16); 2971 Width = 16; 2972 MinOffset = -64; 2973 MaxOffset = 63; 2974 break; 2975 case AArch64::LD1RB_IMM: 2976 case AArch64::LD1RB_H_IMM: 2977 case AArch64::LD1RB_S_IMM: 2978 case AArch64::LD1RB_D_IMM: 2979 case AArch64::LD1RSB_H_IMM: 2980 case AArch64::LD1RSB_S_IMM: 2981 case AArch64::LD1RSB_D_IMM: 2982 Scale = TypeSize::Fixed(1); 2983 Width = 1; 2984 MinOffset = 0; 2985 MaxOffset = 63; 2986 break; 2987 case AArch64::LD1RH_IMM: 2988 case AArch64::LD1RH_S_IMM: 2989 case AArch64::LD1RH_D_IMM: 2990 case AArch64::LD1RSH_S_IMM: 2991 case AArch64::LD1RSH_D_IMM: 2992 Scale = TypeSize::Fixed(2); 2993 Width = 2; 2994 MinOffset = 0; 2995 MaxOffset = 63; 2996 break; 2997 case AArch64::LD1RW_IMM: 2998 case AArch64::LD1RW_D_IMM: 2999 case AArch64::LD1RSW_IMM: 3000 Scale = TypeSize::Fixed(4); 3001 Width = 4; 3002 MinOffset = 0; 3003 MaxOffset = 63; 3004 break; 3005 case AArch64::LD1RD_IMM: 3006 Scale = TypeSize::Fixed(8); 3007 Width = 8; 3008 MinOffset = 0; 3009 MaxOffset = 63; 3010 break; 3011 } 3012 3013 return true; 3014 } 3015 3016 // Scaling factor for unscaled load or store. 3017 int AArch64InstrInfo::getMemScale(unsigned Opc) { 3018 switch (Opc) { 3019 default: 3020 llvm_unreachable("Opcode has unknown scale!"); 3021 case AArch64::LDRBBui: 3022 case AArch64::LDURBBi: 3023 case AArch64::LDRSBWui: 3024 case AArch64::LDURSBWi: 3025 case AArch64::STRBBui: 3026 case AArch64::STURBBi: 3027 return 1; 3028 case AArch64::LDRHHui: 3029 case AArch64::LDURHHi: 3030 case AArch64::LDRSHWui: 3031 case AArch64::LDURSHWi: 3032 case AArch64::STRHHui: 3033 case AArch64::STURHHi: 3034 return 2; 3035 case AArch64::LDRSui: 3036 case AArch64::LDURSi: 3037 case AArch64::LDRSpre: 3038 case AArch64::LDRSWui: 3039 case AArch64::LDURSWi: 3040 case AArch64::LDRWpre: 3041 case AArch64::LDRWui: 3042 case AArch64::LDURWi: 3043 case AArch64::STRSui: 3044 case AArch64::STURSi: 3045 case AArch64::STRSpre: 3046 case AArch64::STRWui: 3047 case AArch64::STURWi: 3048 case AArch64::STRWpre: 3049 case AArch64::LDPSi: 3050 case AArch64::LDPSWi: 3051 case AArch64::LDPWi: 3052 case AArch64::STPSi: 3053 case AArch64::STPWi: 3054 return 4; 3055 case AArch64::LDRDui: 3056 case AArch64::LDURDi: 3057 case AArch64::LDRDpre: 3058 case AArch64::LDRXui: 3059 case AArch64::LDURXi: 3060 case AArch64::LDRXpre: 3061 case AArch64::STRDui: 3062 case AArch64::STURDi: 3063 case AArch64::STRDpre: 3064 case AArch64::STRXui: 3065 case AArch64::STURXi: 3066 case AArch64::STRXpre: 3067 case AArch64::LDPDi: 3068 case AArch64::LDPXi: 3069 case AArch64::STPDi: 3070 case AArch64::STPXi: 3071 return 8; 3072 case AArch64::LDRQui: 3073 case AArch64::LDURQi: 3074 case AArch64::STRQui: 3075 case AArch64::STURQi: 3076 case AArch64::STRQpre: 3077 case AArch64::LDPQi: 3078 case AArch64::LDRQpre: 3079 case AArch64::STPQi: 3080 case AArch64::STGOffset: 3081 case AArch64::STZGOffset: 3082 case AArch64::ST2GOffset: 3083 case AArch64::STZ2GOffset: 3084 case AArch64::STGPi: 3085 return 16; 3086 } 3087 } 3088 3089 bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) { 3090 switch (MI.getOpcode()) { 3091 default: 3092 return false; 3093 case AArch64::LDRWpre: 3094 case AArch64::LDRXpre: 3095 case AArch64::LDRSpre: 3096 case AArch64::LDRDpre: 3097 case AArch64::LDRQpre: 3098 return true; 3099 } 3100 } 3101 3102 bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) { 3103 switch (MI.getOpcode()) { 3104 default: 3105 return false; 3106 case AArch64::STRWpre: 3107 case AArch64::STRXpre: 3108 case AArch64::STRSpre: 3109 case AArch64::STRDpre: 3110 case AArch64::STRQpre: 3111 return true; 3112 } 3113 } 3114 3115 bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) { 3116 return isPreLd(MI) || isPreSt(MI); 3117 } 3118 3119 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 3120 // scaled. 3121 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 3122 int Scale = AArch64InstrInfo::getMemScale(Opc); 3123 3124 // If the byte-offset isn't a multiple of the stride, we can't scale this 3125 // offset. 3126 if (Offset % Scale != 0) 3127 return false; 3128 3129 // Convert the byte-offset used by unscaled into an "element" offset used 3130 // by the scaled pair load/store instructions. 3131 Offset /= Scale; 3132 return true; 3133 } 3134 3135 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 3136 if (FirstOpc == SecondOpc) 3137 return true; 3138 // We can also pair sign-ext and zero-ext instructions. 3139 switch (FirstOpc) { 3140 default: 3141 return false; 3142 case AArch64::LDRWui: 3143 case AArch64::LDURWi: 3144 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 3145 case AArch64::LDRSWui: 3146 case AArch64::LDURSWi: 3147 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 3148 } 3149 // These instructions can't be paired based on their opcodes. 3150 return false; 3151 } 3152 3153 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 3154 int64_t Offset1, unsigned Opcode1, int FI2, 3155 int64_t Offset2, unsigned Opcode2) { 3156 // Accesses through fixed stack object frame indices may access a different 3157 // fixed stack slot. Check that the object offsets + offsets match. 3158 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 3159 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 3160 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 3161 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 3162 // Convert to scaled object offsets. 3163 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); 3164 if (ObjectOffset1 % Scale1 != 0) 3165 return false; 3166 ObjectOffset1 /= Scale1; 3167 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); 3168 if (ObjectOffset2 % Scale2 != 0) 3169 return false; 3170 ObjectOffset2 /= Scale2; 3171 ObjectOffset1 += Offset1; 3172 ObjectOffset2 += Offset2; 3173 return ObjectOffset1 + 1 == ObjectOffset2; 3174 } 3175 3176 return FI1 == FI2; 3177 } 3178 3179 /// Detect opportunities for ldp/stp formation. 3180 /// 3181 /// Only called for LdSt for which getMemOperandWithOffset returns true. 3182 bool AArch64InstrInfo::shouldClusterMemOps( 3183 ArrayRef<const MachineOperand *> BaseOps1, 3184 ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, 3185 unsigned NumBytes) const { 3186 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); 3187 const MachineOperand &BaseOp1 = *BaseOps1.front(); 3188 const MachineOperand &BaseOp2 = *BaseOps2.front(); 3189 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 3190 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 3191 if (BaseOp1.getType() != BaseOp2.getType()) 3192 return false; 3193 3194 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 3195 "Only base registers and frame indices are supported."); 3196 3197 // Check for both base regs and base FI. 3198 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 3199 return false; 3200 3201 // Only cluster up to a single pair. 3202 if (NumLoads > 2) 3203 return false; 3204 3205 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 3206 return false; 3207 3208 // Can we pair these instructions based on their opcodes? 3209 unsigned FirstOpc = FirstLdSt.getOpcode(); 3210 unsigned SecondOpc = SecondLdSt.getOpcode(); 3211 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 3212 return false; 3213 3214 // Can't merge volatiles or load/stores that have a hint to avoid pair 3215 // formation, for example. 3216 if (!isCandidateToMergeOrPair(FirstLdSt) || 3217 !isCandidateToMergeOrPair(SecondLdSt)) 3218 return false; 3219 3220 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 3221 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 3222 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 3223 return false; 3224 3225 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 3226 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 3227 return false; 3228 3229 // Pairwise instructions have a 7-bit signed offset field. 3230 if (Offset1 > 63 || Offset1 < -64) 3231 return false; 3232 3233 // The caller should already have ordered First/SecondLdSt by offset. 3234 // Note: except for non-equal frame index bases 3235 if (BaseOp1.isFI()) { 3236 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 3237 "Caller should have ordered offsets."); 3238 3239 const MachineFrameInfo &MFI = 3240 FirstLdSt.getParent()->getParent()->getFrameInfo(); 3241 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 3242 BaseOp2.getIndex(), Offset2, SecondOpc); 3243 } 3244 3245 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 3246 3247 return Offset1 + 1 == Offset2; 3248 } 3249 3250 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 3251 unsigned Reg, unsigned SubIdx, 3252 unsigned State, 3253 const TargetRegisterInfo *TRI) { 3254 if (!SubIdx) 3255 return MIB.addReg(Reg, State); 3256 3257 if (Register::isPhysicalRegister(Reg)) 3258 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 3259 return MIB.addReg(Reg, State, SubIdx); 3260 } 3261 3262 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 3263 unsigned NumRegs) { 3264 // We really want the positive remainder mod 32 here, that happens to be 3265 // easily obtainable with a mask. 3266 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 3267 } 3268 3269 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 3270 MachineBasicBlock::iterator I, 3271 const DebugLoc &DL, MCRegister DestReg, 3272 MCRegister SrcReg, bool KillSrc, 3273 unsigned Opcode, 3274 ArrayRef<unsigned> Indices) const { 3275 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 3276 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3277 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 3278 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 3279 unsigned NumRegs = Indices.size(); 3280 3281 int SubReg = 0, End = NumRegs, Incr = 1; 3282 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 3283 SubReg = NumRegs - 1; 3284 End = -1; 3285 Incr = -1; 3286 } 3287 3288 for (; SubReg != End; SubReg += Incr) { 3289 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 3290 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 3291 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 3292 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 3293 } 3294 } 3295 3296 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 3297 MachineBasicBlock::iterator I, 3298 DebugLoc DL, unsigned DestReg, 3299 unsigned SrcReg, bool KillSrc, 3300 unsigned Opcode, unsigned ZeroReg, 3301 llvm::ArrayRef<unsigned> Indices) const { 3302 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3303 unsigned NumRegs = Indices.size(); 3304 3305 #ifndef NDEBUG 3306 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 3307 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 3308 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 3309 "GPR reg sequences should not be able to overlap"); 3310 #endif 3311 3312 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 3313 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 3314 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 3315 MIB.addReg(ZeroReg); 3316 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 3317 MIB.addImm(0); 3318 } 3319 } 3320 3321 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 3322 MachineBasicBlock::iterator I, 3323 const DebugLoc &DL, MCRegister DestReg, 3324 MCRegister SrcReg, bool KillSrc) const { 3325 if (AArch64::GPR32spRegClass.contains(DestReg) && 3326 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 3327 const TargetRegisterInfo *TRI = &getRegisterInfo(); 3328 3329 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 3330 // If either operand is WSP, expand to ADD #0. 3331 if (Subtarget.hasZeroCycleRegMove()) { 3332 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 3333 MCRegister DestRegX = TRI->getMatchingSuperReg( 3334 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3335 MCRegister SrcRegX = TRI->getMatchingSuperReg( 3336 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3337 // This instruction is reading and writing X registers. This may upset 3338 // the register scavenger and machine verifier, so we need to indicate 3339 // that we are reading an undefined value from SrcRegX, but a proper 3340 // value from SrcReg. 3341 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 3342 .addReg(SrcRegX, RegState::Undef) 3343 .addImm(0) 3344 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 3345 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 3346 } else { 3347 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 3348 .addReg(SrcReg, getKillRegState(KillSrc)) 3349 .addImm(0) 3350 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3351 } 3352 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 3353 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 3354 .addImm(0) 3355 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3356 } else { 3357 if (Subtarget.hasZeroCycleRegMove()) { 3358 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 3359 MCRegister DestRegX = TRI->getMatchingSuperReg( 3360 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3361 MCRegister SrcRegX = TRI->getMatchingSuperReg( 3362 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 3363 // This instruction is reading and writing X registers. This may upset 3364 // the register scavenger and machine verifier, so we need to indicate 3365 // that we are reading an undefined value from SrcRegX, but a proper 3366 // value from SrcReg. 3367 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 3368 .addReg(AArch64::XZR) 3369 .addReg(SrcRegX, RegState::Undef) 3370 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 3371 } else { 3372 // Otherwise, expand to ORR WZR. 3373 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 3374 .addReg(AArch64::WZR) 3375 .addReg(SrcReg, getKillRegState(KillSrc)); 3376 } 3377 } 3378 return; 3379 } 3380 3381 // Copy a Predicate register by ORRing with itself. 3382 if (AArch64::PPRRegClass.contains(DestReg) && 3383 AArch64::PPRRegClass.contains(SrcReg)) { 3384 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 3385 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) 3386 .addReg(SrcReg) // Pg 3387 .addReg(SrcReg) 3388 .addReg(SrcReg, getKillRegState(KillSrc)); 3389 return; 3390 } 3391 3392 // Copy a Z register by ORRing with itself. 3393 if (AArch64::ZPRRegClass.contains(DestReg) && 3394 AArch64::ZPRRegClass.contains(SrcReg)) { 3395 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 3396 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) 3397 .addReg(SrcReg) 3398 .addReg(SrcReg, getKillRegState(KillSrc)); 3399 return; 3400 } 3401 3402 // Copy a Z register pair by copying the individual sub-registers. 3403 if (AArch64::ZPR2RegClass.contains(DestReg) && 3404 AArch64::ZPR2RegClass.contains(SrcReg)) { 3405 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; 3406 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3407 Indices); 3408 return; 3409 } 3410 3411 // Copy a Z register triple by copying the individual sub-registers. 3412 if (AArch64::ZPR3RegClass.contains(DestReg) && 3413 AArch64::ZPR3RegClass.contains(SrcReg)) { 3414 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 3415 AArch64::zsub2}; 3416 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3417 Indices); 3418 return; 3419 } 3420 3421 // Copy a Z register quad by copying the individual sub-registers. 3422 if (AArch64::ZPR4RegClass.contains(DestReg) && 3423 AArch64::ZPR4RegClass.contains(SrcReg)) { 3424 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, 3425 AArch64::zsub2, AArch64::zsub3}; 3426 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, 3427 Indices); 3428 return; 3429 } 3430 3431 if (AArch64::GPR64spRegClass.contains(DestReg) && 3432 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 3433 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 3434 // If either operand is SP, expand to ADD #0. 3435 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 3436 .addReg(SrcReg, getKillRegState(KillSrc)) 3437 .addImm(0) 3438 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3439 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 3440 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 3441 .addImm(0) 3442 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 3443 } else { 3444 // Otherwise, expand to ORR XZR. 3445 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 3446 .addReg(AArch64::XZR) 3447 .addReg(SrcReg, getKillRegState(KillSrc)); 3448 } 3449 return; 3450 } 3451 3452 // Copy a DDDD register quad by copying the individual sub-registers. 3453 if (AArch64::DDDDRegClass.contains(DestReg) && 3454 AArch64::DDDDRegClass.contains(SrcReg)) { 3455 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 3456 AArch64::dsub2, AArch64::dsub3}; 3457 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3458 Indices); 3459 return; 3460 } 3461 3462 // Copy a DDD register triple by copying the individual sub-registers. 3463 if (AArch64::DDDRegClass.contains(DestReg) && 3464 AArch64::DDDRegClass.contains(SrcReg)) { 3465 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 3466 AArch64::dsub2}; 3467 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3468 Indices); 3469 return; 3470 } 3471 3472 // Copy a DD register pair by copying the individual sub-registers. 3473 if (AArch64::DDRegClass.contains(DestReg) && 3474 AArch64::DDRegClass.contains(SrcReg)) { 3475 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 3476 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 3477 Indices); 3478 return; 3479 } 3480 3481 // Copy a QQQQ register quad by copying the individual sub-registers. 3482 if (AArch64::QQQQRegClass.contains(DestReg) && 3483 AArch64::QQQQRegClass.contains(SrcReg)) { 3484 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 3485 AArch64::qsub2, AArch64::qsub3}; 3486 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3487 Indices); 3488 return; 3489 } 3490 3491 // Copy a QQQ register triple by copying the individual sub-registers. 3492 if (AArch64::QQQRegClass.contains(DestReg) && 3493 AArch64::QQQRegClass.contains(SrcReg)) { 3494 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 3495 AArch64::qsub2}; 3496 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3497 Indices); 3498 return; 3499 } 3500 3501 // Copy a QQ register pair by copying the individual sub-registers. 3502 if (AArch64::QQRegClass.contains(DestReg) && 3503 AArch64::QQRegClass.contains(SrcReg)) { 3504 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 3505 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 3506 Indices); 3507 return; 3508 } 3509 3510 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 3511 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 3512 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 3513 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 3514 AArch64::XZR, Indices); 3515 return; 3516 } 3517 3518 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 3519 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 3520 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 3521 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 3522 AArch64::WZR, Indices); 3523 return; 3524 } 3525 3526 if (AArch64::FPR128RegClass.contains(DestReg) && 3527 AArch64::FPR128RegClass.contains(SrcReg)) { 3528 if (Subtarget.hasNEON()) { 3529 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 3530 .addReg(SrcReg) 3531 .addReg(SrcReg, getKillRegState(KillSrc)); 3532 } else { 3533 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 3534 .addReg(AArch64::SP, RegState::Define) 3535 .addReg(SrcReg, getKillRegState(KillSrc)) 3536 .addReg(AArch64::SP) 3537 .addImm(-16); 3538 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 3539 .addReg(AArch64::SP, RegState::Define) 3540 .addReg(DestReg, RegState::Define) 3541 .addReg(AArch64::SP) 3542 .addImm(16); 3543 } 3544 return; 3545 } 3546 3547 if (AArch64::FPR64RegClass.contains(DestReg) && 3548 AArch64::FPR64RegClass.contains(SrcReg)) { 3549 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 3550 .addReg(SrcReg, getKillRegState(KillSrc)); 3551 return; 3552 } 3553 3554 if (AArch64::FPR32RegClass.contains(DestReg) && 3555 AArch64::FPR32RegClass.contains(SrcReg)) { 3556 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3557 .addReg(SrcReg, getKillRegState(KillSrc)); 3558 return; 3559 } 3560 3561 if (AArch64::FPR16RegClass.contains(DestReg) && 3562 AArch64::FPR16RegClass.contains(SrcReg)) { 3563 DestReg = 3564 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass); 3565 SrcReg = 3566 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass); 3567 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3568 .addReg(SrcReg, getKillRegState(KillSrc)); 3569 return; 3570 } 3571 3572 if (AArch64::FPR8RegClass.contains(DestReg) && 3573 AArch64::FPR8RegClass.contains(SrcReg)) { 3574 DestReg = 3575 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass); 3576 SrcReg = 3577 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass); 3578 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 3579 .addReg(SrcReg, getKillRegState(KillSrc)); 3580 return; 3581 } 3582 3583 // Copies between GPR64 and FPR64. 3584 if (AArch64::FPR64RegClass.contains(DestReg) && 3585 AArch64::GPR64RegClass.contains(SrcReg)) { 3586 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 3587 .addReg(SrcReg, getKillRegState(KillSrc)); 3588 return; 3589 } 3590 if (AArch64::GPR64RegClass.contains(DestReg) && 3591 AArch64::FPR64RegClass.contains(SrcReg)) { 3592 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 3593 .addReg(SrcReg, getKillRegState(KillSrc)); 3594 return; 3595 } 3596 // Copies between GPR32 and FPR32. 3597 if (AArch64::FPR32RegClass.contains(DestReg) && 3598 AArch64::GPR32RegClass.contains(SrcReg)) { 3599 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 3600 .addReg(SrcReg, getKillRegState(KillSrc)); 3601 return; 3602 } 3603 if (AArch64::GPR32RegClass.contains(DestReg) && 3604 AArch64::FPR32RegClass.contains(SrcReg)) { 3605 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 3606 .addReg(SrcReg, getKillRegState(KillSrc)); 3607 return; 3608 } 3609 3610 if (DestReg == AArch64::NZCV) { 3611 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 3612 BuildMI(MBB, I, DL, get(AArch64::MSR)) 3613 .addImm(AArch64SysReg::NZCV) 3614 .addReg(SrcReg, getKillRegState(KillSrc)) 3615 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 3616 return; 3617 } 3618 3619 if (SrcReg == AArch64::NZCV) { 3620 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 3621 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 3622 .addImm(AArch64SysReg::NZCV) 3623 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 3624 return; 3625 } 3626 3627 #ifndef NDEBUG 3628 const TargetRegisterInfo &TRI = getRegisterInfo(); 3629 errs() << TRI.getRegAsmName(DestReg) << " = COPY " 3630 << TRI.getRegAsmName(SrcReg) << "\n"; 3631 #endif 3632 llvm_unreachable("unimplemented reg-to-reg copy"); 3633 } 3634 3635 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 3636 MachineBasicBlock &MBB, 3637 MachineBasicBlock::iterator InsertBefore, 3638 const MCInstrDesc &MCID, 3639 Register SrcReg, bool IsKill, 3640 unsigned SubIdx0, unsigned SubIdx1, int FI, 3641 MachineMemOperand *MMO) { 3642 Register SrcReg0 = SrcReg; 3643 Register SrcReg1 = SrcReg; 3644 if (Register::isPhysicalRegister(SrcReg)) { 3645 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 3646 SubIdx0 = 0; 3647 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 3648 SubIdx1 = 0; 3649 } 3650 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3651 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 3652 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 3653 .addFrameIndex(FI) 3654 .addImm(0) 3655 .addMemOperand(MMO); 3656 } 3657 3658 void AArch64InstrInfo::storeRegToStackSlot( 3659 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, 3660 bool isKill, int FI, const TargetRegisterClass *RC, 3661 const TargetRegisterInfo *TRI) const { 3662 MachineFunction &MF = *MBB.getParent(); 3663 MachineFrameInfo &MFI = MF.getFrameInfo(); 3664 3665 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3666 MachineMemOperand *MMO = 3667 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 3668 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3669 unsigned Opc = 0; 3670 bool Offset = true; 3671 unsigned StackID = TargetStackID::Default; 3672 switch (TRI->getSpillSize(*RC)) { 3673 case 1: 3674 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3675 Opc = AArch64::STRBui; 3676 break; 3677 case 2: 3678 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3679 Opc = AArch64::STRHui; 3680 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3681 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3682 Opc = AArch64::STR_PXI; 3683 StackID = TargetStackID::ScalableVector; 3684 } 3685 break; 3686 case 4: 3687 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3688 Opc = AArch64::STRWui; 3689 if (Register::isVirtualRegister(SrcReg)) 3690 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 3691 else 3692 assert(SrcReg != AArch64::WSP); 3693 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3694 Opc = AArch64::STRSui; 3695 break; 3696 case 8: 3697 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3698 Opc = AArch64::STRXui; 3699 if (Register::isVirtualRegister(SrcReg)) 3700 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3701 else 3702 assert(SrcReg != AArch64::SP); 3703 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3704 Opc = AArch64::STRDui; 3705 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3706 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3707 get(AArch64::STPWi), SrcReg, isKill, 3708 AArch64::sube32, AArch64::subo32, FI, MMO); 3709 return; 3710 } 3711 break; 3712 case 16: 3713 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3714 Opc = AArch64::STRQui; 3715 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3716 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3717 Opc = AArch64::ST1Twov1d; 3718 Offset = false; 3719 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3720 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3721 get(AArch64::STPXi), SrcReg, isKill, 3722 AArch64::sube64, AArch64::subo64, FI, MMO); 3723 return; 3724 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3725 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3726 Opc = AArch64::STR_ZXI; 3727 StackID = TargetStackID::ScalableVector; 3728 } 3729 break; 3730 case 24: 3731 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3732 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3733 Opc = AArch64::ST1Threev1d; 3734 Offset = false; 3735 } 3736 break; 3737 case 32: 3738 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3739 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3740 Opc = AArch64::ST1Fourv1d; 3741 Offset = false; 3742 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3743 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3744 Opc = AArch64::ST1Twov2d; 3745 Offset = false; 3746 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3747 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3748 Opc = AArch64::STR_ZZXI; 3749 StackID = TargetStackID::ScalableVector; 3750 } 3751 break; 3752 case 48: 3753 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3754 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3755 Opc = AArch64::ST1Threev2d; 3756 Offset = false; 3757 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3758 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3759 Opc = AArch64::STR_ZZZXI; 3760 StackID = TargetStackID::ScalableVector; 3761 } 3762 break; 3763 case 64: 3764 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3765 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3766 Opc = AArch64::ST1Fourv2d; 3767 Offset = false; 3768 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3769 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3770 Opc = AArch64::STR_ZZZZXI; 3771 StackID = TargetStackID::ScalableVector; 3772 } 3773 break; 3774 } 3775 assert(Opc && "Unknown register class"); 3776 MFI.setStackID(FI, StackID); 3777 3778 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3779 .addReg(SrcReg, getKillRegState(isKill)) 3780 .addFrameIndex(FI); 3781 3782 if (Offset) 3783 MI.addImm(0); 3784 MI.addMemOperand(MMO); 3785 } 3786 3787 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 3788 MachineBasicBlock &MBB, 3789 MachineBasicBlock::iterator InsertBefore, 3790 const MCInstrDesc &MCID, 3791 Register DestReg, unsigned SubIdx0, 3792 unsigned SubIdx1, int FI, 3793 MachineMemOperand *MMO) { 3794 Register DestReg0 = DestReg; 3795 Register DestReg1 = DestReg; 3796 bool IsUndef = true; 3797 if (Register::isPhysicalRegister(DestReg)) { 3798 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 3799 SubIdx0 = 0; 3800 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 3801 SubIdx1 = 0; 3802 IsUndef = false; 3803 } 3804 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3805 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 3806 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 3807 .addFrameIndex(FI) 3808 .addImm(0) 3809 .addMemOperand(MMO); 3810 } 3811 3812 void AArch64InstrInfo::loadRegFromStackSlot( 3813 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, 3814 int FI, const TargetRegisterClass *RC, 3815 const TargetRegisterInfo *TRI) const { 3816 MachineFunction &MF = *MBB.getParent(); 3817 MachineFrameInfo &MFI = MF.getFrameInfo(); 3818 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3819 MachineMemOperand *MMO = 3820 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 3821 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3822 3823 unsigned Opc = 0; 3824 bool Offset = true; 3825 unsigned StackID = TargetStackID::Default; 3826 switch (TRI->getSpillSize(*RC)) { 3827 case 1: 3828 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3829 Opc = AArch64::LDRBui; 3830 break; 3831 case 2: 3832 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3833 Opc = AArch64::LDRHui; 3834 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3835 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3836 Opc = AArch64::LDR_PXI; 3837 StackID = TargetStackID::ScalableVector; 3838 } 3839 break; 3840 case 4: 3841 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3842 Opc = AArch64::LDRWui; 3843 if (Register::isVirtualRegister(DestReg)) 3844 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 3845 else 3846 assert(DestReg != AArch64::WSP); 3847 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3848 Opc = AArch64::LDRSui; 3849 break; 3850 case 8: 3851 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3852 Opc = AArch64::LDRXui; 3853 if (Register::isVirtualRegister(DestReg)) 3854 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 3855 else 3856 assert(DestReg != AArch64::SP); 3857 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3858 Opc = AArch64::LDRDui; 3859 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3860 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3861 get(AArch64::LDPWi), DestReg, AArch64::sube32, 3862 AArch64::subo32, FI, MMO); 3863 return; 3864 } 3865 break; 3866 case 16: 3867 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3868 Opc = AArch64::LDRQui; 3869 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3870 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3871 Opc = AArch64::LD1Twov1d; 3872 Offset = false; 3873 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3874 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3875 get(AArch64::LDPXi), DestReg, AArch64::sube64, 3876 AArch64::subo64, FI, MMO); 3877 return; 3878 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3879 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3880 Opc = AArch64::LDR_ZXI; 3881 StackID = TargetStackID::ScalableVector; 3882 } 3883 break; 3884 case 24: 3885 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3886 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3887 Opc = AArch64::LD1Threev1d; 3888 Offset = false; 3889 } 3890 break; 3891 case 32: 3892 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3893 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3894 Opc = AArch64::LD1Fourv1d; 3895 Offset = false; 3896 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3897 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3898 Opc = AArch64::LD1Twov2d; 3899 Offset = false; 3900 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3901 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3902 Opc = AArch64::LDR_ZZXI; 3903 StackID = TargetStackID::ScalableVector; 3904 } 3905 break; 3906 case 48: 3907 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3908 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3909 Opc = AArch64::LD1Threev2d; 3910 Offset = false; 3911 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3912 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3913 Opc = AArch64::LDR_ZZZXI; 3914 StackID = TargetStackID::ScalableVector; 3915 } 3916 break; 3917 case 64: 3918 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3919 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3920 Opc = AArch64::LD1Fourv2d; 3921 Offset = false; 3922 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3923 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3924 Opc = AArch64::LDR_ZZZZXI; 3925 StackID = TargetStackID::ScalableVector; 3926 } 3927 break; 3928 } 3929 3930 assert(Opc && "Unknown register class"); 3931 MFI.setStackID(FI, StackID); 3932 3933 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3934 .addReg(DestReg, getDefRegState(true)) 3935 .addFrameIndex(FI); 3936 if (Offset) 3937 MI.addImm(0); 3938 MI.addMemOperand(MMO); 3939 } 3940 3941 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 3942 const MachineInstr &UseMI, 3943 const TargetRegisterInfo *TRI) { 3944 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), 3945 UseMI.getIterator()), 3946 [TRI](const MachineInstr &I) { 3947 return I.modifiesRegister(AArch64::NZCV, TRI) || 3948 I.readsRegister(AArch64::NZCV, TRI); 3949 }); 3950 } 3951 3952 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 3953 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { 3954 // The smallest scalable element supported by scaled SVE addressing 3955 // modes are predicates, which are 2 scalable bytes in size. So the scalable 3956 // byte offset must always be a multiple of 2. 3957 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 3958 3959 // VGSized offsets are divided by '2', because the VG register is the 3960 // the number of 64bit granules as opposed to 128bit vector chunks, 3961 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. 3962 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. 3963 // VG = n * 2 and the dwarf offset must be VG * 8 bytes. 3964 ByteSized = Offset.getFixed(); 3965 VGSized = Offset.getScalable() / 2; 3966 } 3967 3968 /// Returns the offset in parts to which this frame offset can be 3969 /// decomposed for the purpose of describing a frame offset. 3970 /// For non-scalable offsets this is simply its byte size. 3971 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 3972 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, 3973 int64_t &NumDataVectors) { 3974 // The smallest scalable element supported by scaled SVE addressing 3975 // modes are predicates, which are 2 scalable bytes in size. So the scalable 3976 // byte offset must always be a multiple of 2. 3977 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 3978 3979 NumBytes = Offset.getFixed(); 3980 NumDataVectors = 0; 3981 NumPredicateVectors = Offset.getScalable() / 2; 3982 // This method is used to get the offsets to adjust the frame offset. 3983 // If the function requires ADDPL to be used and needs more than two ADDPL 3984 // instructions, part of the offset is folded into NumDataVectors so that it 3985 // uses ADDVL for part of it, reducing the number of ADDPL instructions. 3986 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || 3987 NumPredicateVectors > 62) { 3988 NumDataVectors = NumPredicateVectors / 8; 3989 NumPredicateVectors -= NumDataVectors * 8; 3990 } 3991 } 3992 3993 // Helper function to emit a frame offset adjustment from a given 3994 // pointer (SrcReg), stored into DestReg. This function is explicit 3995 // in that it requires the opcode. 3996 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 3997 MachineBasicBlock::iterator MBBI, 3998 const DebugLoc &DL, unsigned DestReg, 3999 unsigned SrcReg, int64_t Offset, unsigned Opc, 4000 const TargetInstrInfo *TII, 4001 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 4002 bool *HasWinCFI) { 4003 int Sign = 1; 4004 unsigned MaxEncoding, ShiftSize; 4005 switch (Opc) { 4006 case AArch64::ADDXri: 4007 case AArch64::ADDSXri: 4008 case AArch64::SUBXri: 4009 case AArch64::SUBSXri: 4010 MaxEncoding = 0xfff; 4011 ShiftSize = 12; 4012 break; 4013 case AArch64::ADDVL_XXI: 4014 case AArch64::ADDPL_XXI: 4015 MaxEncoding = 31; 4016 ShiftSize = 0; 4017 if (Offset < 0) { 4018 MaxEncoding = 32; 4019 Sign = -1; 4020 Offset = -Offset; 4021 } 4022 break; 4023 default: 4024 llvm_unreachable("Unsupported opcode"); 4025 } 4026 4027 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 4028 // scratch register. If DestReg is a virtual register, use it as the 4029 // scratch register; otherwise, create a new virtual register (to be 4030 // replaced by the scavenger at the end of PEI). That case can be optimized 4031 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 4032 // register can be loaded with offset%8 and the add/sub can use an extending 4033 // instruction with LSL#3. 4034 // Currently the function handles any offsets but generates a poor sequence 4035 // of code. 4036 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 4037 4038 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 4039 Register TmpReg = DestReg; 4040 if (TmpReg == AArch64::XZR) 4041 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( 4042 &AArch64::GPR64RegClass); 4043 do { 4044 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); 4045 unsigned LocalShiftSize = 0; 4046 if (ThisVal > MaxEncoding) { 4047 ThisVal = ThisVal >> ShiftSize; 4048 LocalShiftSize = ShiftSize; 4049 } 4050 assert((ThisVal >> ShiftSize) <= MaxEncoding && 4051 "Encoding cannot handle value that big"); 4052 4053 Offset -= ThisVal << LocalShiftSize; 4054 if (Offset == 0) 4055 TmpReg = DestReg; 4056 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) 4057 .addReg(SrcReg) 4058 .addImm(Sign * (int)ThisVal); 4059 if (ShiftSize) 4060 MBI = MBI.addImm( 4061 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 4062 MBI = MBI.setMIFlag(Flag); 4063 4064 if (NeedsWinCFI) { 4065 assert(Sign == 1 && "SEH directives should always have a positive sign"); 4066 int Imm = (int)(ThisVal << LocalShiftSize); 4067 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 4068 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 4069 if (HasWinCFI) 4070 *HasWinCFI = true; 4071 if (Imm == 0) 4072 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 4073 else 4074 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 4075 .addImm(Imm) 4076 .setMIFlag(Flag); 4077 assert(Offset == 0 && "Expected remaining offset to be zero to " 4078 "emit a single SEH directive"); 4079 } else if (DestReg == AArch64::SP) { 4080 if (HasWinCFI) 4081 *HasWinCFI = true; 4082 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 4083 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 4084 .addImm(Imm) 4085 .setMIFlag(Flag); 4086 } 4087 if (HasWinCFI) 4088 *HasWinCFI = true; 4089 } 4090 4091 SrcReg = TmpReg; 4092 } while (Offset); 4093 } 4094 4095 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 4096 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 4097 unsigned DestReg, unsigned SrcReg, 4098 StackOffset Offset, const TargetInstrInfo *TII, 4099 MachineInstr::MIFlag Flag, bool SetNZCV, 4100 bool NeedsWinCFI, bool *HasWinCFI) { 4101 int64_t Bytes, NumPredicateVectors, NumDataVectors; 4102 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 4103 Offset, Bytes, NumPredicateVectors, NumDataVectors); 4104 4105 // First emit non-scalable frame offsets, or a simple 'mov'. 4106 if (Bytes || (!Offset && SrcReg != DestReg)) { 4107 assert((DestReg != AArch64::SP || Bytes % 8 == 0) && 4108 "SP increment/decrement not 8-byte aligned"); 4109 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 4110 if (Bytes < 0) { 4111 Bytes = -Bytes; 4112 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 4113 } 4114 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 4115 NeedsWinCFI, HasWinCFI); 4116 SrcReg = DestReg; 4117 } 4118 4119 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && 4120 "SetNZCV not supported with SVE vectors"); 4121 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && 4122 "WinCFI not supported with SVE vectors"); 4123 4124 if (NumDataVectors) { 4125 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, 4126 AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); 4127 SrcReg = DestReg; 4128 } 4129 4130 if (NumPredicateVectors) { 4131 assert(DestReg != AArch64::SP && "Unaligned access to SP"); 4132 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, 4133 AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); 4134 } 4135 } 4136 4137 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 4138 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 4139 MachineBasicBlock::iterator InsertPt, int FrameIndex, 4140 LiveIntervals *LIS, VirtRegMap *VRM) const { 4141 // This is a bit of a hack. Consider this instruction: 4142 // 4143 // %0 = COPY %sp; GPR64all:%0 4144 // 4145 // We explicitly chose GPR64all for the virtual register so such a copy might 4146 // be eliminated by RegisterCoalescer. However, that may not be possible, and 4147 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 4148 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 4149 // 4150 // To prevent that, we are going to constrain the %0 register class here. 4151 // 4152 // <rdar://problem/11522048> 4153 // 4154 if (MI.isFullCopy()) { 4155 Register DstReg = MI.getOperand(0).getReg(); 4156 Register SrcReg = MI.getOperand(1).getReg(); 4157 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { 4158 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 4159 return nullptr; 4160 } 4161 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { 4162 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 4163 return nullptr; 4164 } 4165 } 4166 4167 // Handle the case where a copy is being spilled or filled but the source 4168 // and destination register class don't match. For example: 4169 // 4170 // %0 = COPY %xzr; GPR64common:%0 4171 // 4172 // In this case we can still safely fold away the COPY and generate the 4173 // following spill code: 4174 // 4175 // STRXui %xzr, %stack.0 4176 // 4177 // This also eliminates spilled cross register class COPYs (e.g. between x and 4178 // d regs) of the same size. For example: 4179 // 4180 // %0 = COPY %1; GPR64:%0, FPR64:%1 4181 // 4182 // will be filled as 4183 // 4184 // LDRDui %0, fi<#0> 4185 // 4186 // instead of 4187 // 4188 // LDRXui %Temp, fi<#0> 4189 // %0 = FMOV %Temp 4190 // 4191 if (MI.isCopy() && Ops.size() == 1 && 4192 // Make sure we're only folding the explicit COPY defs/uses. 4193 (Ops[0] == 0 || Ops[0] == 1)) { 4194 bool IsSpill = Ops[0] == 0; 4195 bool IsFill = !IsSpill; 4196 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 4197 const MachineRegisterInfo &MRI = MF.getRegInfo(); 4198 MachineBasicBlock &MBB = *MI.getParent(); 4199 const MachineOperand &DstMO = MI.getOperand(0); 4200 const MachineOperand &SrcMO = MI.getOperand(1); 4201 Register DstReg = DstMO.getReg(); 4202 Register SrcReg = SrcMO.getReg(); 4203 // This is slightly expensive to compute for physical regs since 4204 // getMinimalPhysRegClass is slow. 4205 auto getRegClass = [&](unsigned Reg) { 4206 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 4207 : TRI.getMinimalPhysRegClass(Reg); 4208 }; 4209 4210 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 4211 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 4212 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 4213 "Mismatched register size in non subreg COPY"); 4214 if (IsSpill) 4215 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 4216 getRegClass(SrcReg), &TRI); 4217 else 4218 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 4219 getRegClass(DstReg), &TRI); 4220 return &*--InsertPt; 4221 } 4222 4223 // Handle cases like spilling def of: 4224 // 4225 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 4226 // 4227 // where the physical register source can be widened and stored to the full 4228 // virtual reg destination stack slot, in this case producing: 4229 // 4230 // STRXui %xzr, %stack.0 4231 // 4232 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { 4233 assert(SrcMO.getSubReg() == 0 && 4234 "Unexpected subreg on physical register"); 4235 const TargetRegisterClass *SpillRC; 4236 unsigned SpillSubreg; 4237 switch (DstMO.getSubReg()) { 4238 default: 4239 SpillRC = nullptr; 4240 break; 4241 case AArch64::sub_32: 4242 case AArch64::ssub: 4243 if (AArch64::GPR32RegClass.contains(SrcReg)) { 4244 SpillRC = &AArch64::GPR64RegClass; 4245 SpillSubreg = AArch64::sub_32; 4246 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 4247 SpillRC = &AArch64::FPR64RegClass; 4248 SpillSubreg = AArch64::ssub; 4249 } else 4250 SpillRC = nullptr; 4251 break; 4252 case AArch64::dsub: 4253 if (AArch64::FPR64RegClass.contains(SrcReg)) { 4254 SpillRC = &AArch64::FPR128RegClass; 4255 SpillSubreg = AArch64::dsub; 4256 } else 4257 SpillRC = nullptr; 4258 break; 4259 } 4260 4261 if (SpillRC) 4262 if (unsigned WidenedSrcReg = 4263 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 4264 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 4265 FrameIndex, SpillRC, &TRI); 4266 return &*--InsertPt; 4267 } 4268 } 4269 4270 // Handle cases like filling use of: 4271 // 4272 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 4273 // 4274 // where we can load the full virtual reg source stack slot, into the subreg 4275 // destination, in this case producing: 4276 // 4277 // LDRWui %0:sub_32<def,read-undef>, %stack.0 4278 // 4279 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 4280 const TargetRegisterClass *FillRC; 4281 switch (DstMO.getSubReg()) { 4282 default: 4283 FillRC = nullptr; 4284 break; 4285 case AArch64::sub_32: 4286 FillRC = &AArch64::GPR32RegClass; 4287 break; 4288 case AArch64::ssub: 4289 FillRC = &AArch64::FPR32RegClass; 4290 break; 4291 case AArch64::dsub: 4292 FillRC = &AArch64::FPR64RegClass; 4293 break; 4294 } 4295 4296 if (FillRC) { 4297 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 4298 TRI.getRegSizeInBits(*FillRC) && 4299 "Mismatched regclass size on folded subreg COPY"); 4300 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 4301 MachineInstr &LoadMI = *--InsertPt; 4302 MachineOperand &LoadDst = LoadMI.getOperand(0); 4303 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 4304 LoadDst.setSubReg(DstMO.getSubReg()); 4305 LoadDst.setIsUndef(); 4306 return &LoadMI; 4307 } 4308 } 4309 } 4310 4311 // Cannot fold. 4312 return nullptr; 4313 } 4314 4315 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 4316 StackOffset &SOffset, 4317 bool *OutUseUnscaledOp, 4318 unsigned *OutUnscaledOp, 4319 int64_t *EmittableOffset) { 4320 // Set output values in case of early exit. 4321 if (EmittableOffset) 4322 *EmittableOffset = 0; 4323 if (OutUseUnscaledOp) 4324 *OutUseUnscaledOp = false; 4325 if (OutUnscaledOp) 4326 *OutUnscaledOp = 0; 4327 4328 // Exit early for structured vector spills/fills as they can't take an 4329 // immediate offset. 4330 switch (MI.getOpcode()) { 4331 default: 4332 break; 4333 case AArch64::LD1Twov2d: 4334 case AArch64::LD1Threev2d: 4335 case AArch64::LD1Fourv2d: 4336 case AArch64::LD1Twov1d: 4337 case AArch64::LD1Threev1d: 4338 case AArch64::LD1Fourv1d: 4339 case AArch64::ST1Twov2d: 4340 case AArch64::ST1Threev2d: 4341 case AArch64::ST1Fourv2d: 4342 case AArch64::ST1Twov1d: 4343 case AArch64::ST1Threev1d: 4344 case AArch64::ST1Fourv1d: 4345 case AArch64::ST1i8: 4346 case AArch64::ST1i16: 4347 case AArch64::ST1i32: 4348 case AArch64::ST1i64: 4349 case AArch64::IRG: 4350 case AArch64::IRGstack: 4351 case AArch64::STGloop: 4352 case AArch64::STZGloop: 4353 return AArch64FrameOffsetCannotUpdate; 4354 } 4355 4356 // Get the min/max offset and the scale. 4357 TypeSize ScaleValue(0U, false); 4358 unsigned Width; 4359 int64_t MinOff, MaxOff; 4360 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, 4361 MaxOff)) 4362 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 4363 4364 // Construct the complete offset. 4365 bool IsMulVL = ScaleValue.isScalable(); 4366 unsigned Scale = ScaleValue.getKnownMinSize(); 4367 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); 4368 4369 const MachineOperand &ImmOpnd = 4370 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 4371 Offset += ImmOpnd.getImm() * Scale; 4372 4373 // If the offset doesn't match the scale, we rewrite the instruction to 4374 // use the unscaled instruction instead. Likewise, if we have a negative 4375 // offset and there is an unscaled op to use. 4376 Optional<unsigned> UnscaledOp = 4377 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 4378 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 4379 if (useUnscaledOp && 4380 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, 4381 MaxOff)) 4382 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 4383 4384 Scale = ScaleValue.getKnownMinSize(); 4385 assert(IsMulVL == ScaleValue.isScalable() && 4386 "Unscaled opcode has different value for scalable"); 4387 4388 int64_t Remainder = Offset % Scale; 4389 assert(!(Remainder && useUnscaledOp) && 4390 "Cannot have remainder when using unscaled op"); 4391 4392 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 4393 int64_t NewOffset = Offset / Scale; 4394 if (MinOff <= NewOffset && NewOffset <= MaxOff) 4395 Offset = Remainder; 4396 else { 4397 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 4398 Offset = Offset - NewOffset * Scale + Remainder; 4399 } 4400 4401 if (EmittableOffset) 4402 *EmittableOffset = NewOffset; 4403 if (OutUseUnscaledOp) 4404 *OutUseUnscaledOp = useUnscaledOp; 4405 if (OutUnscaledOp && UnscaledOp) 4406 *OutUnscaledOp = *UnscaledOp; 4407 4408 if (IsMulVL) 4409 SOffset = StackOffset::get(SOffset.getFixed(), Offset); 4410 else 4411 SOffset = StackOffset::get(Offset, SOffset.getScalable()); 4412 return AArch64FrameOffsetCanUpdate | 4413 (SOffset ? 0 : AArch64FrameOffsetIsLegal); 4414 } 4415 4416 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 4417 unsigned FrameReg, StackOffset &Offset, 4418 const AArch64InstrInfo *TII) { 4419 unsigned Opcode = MI.getOpcode(); 4420 unsigned ImmIdx = FrameRegIdx + 1; 4421 4422 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 4423 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); 4424 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 4425 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 4426 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 4427 MI.eraseFromParent(); 4428 Offset = StackOffset(); 4429 return true; 4430 } 4431 4432 int64_t NewOffset; 4433 unsigned UnscaledOp; 4434 bool UseUnscaledOp; 4435 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 4436 &UnscaledOp, &NewOffset); 4437 if (Status & AArch64FrameOffsetCanUpdate) { 4438 if (Status & AArch64FrameOffsetIsLegal) 4439 // Replace the FrameIndex with FrameReg. 4440 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 4441 if (UseUnscaledOp) 4442 MI.setDesc(TII->get(UnscaledOp)); 4443 4444 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 4445 return !Offset; 4446 } 4447 4448 return false; 4449 } 4450 4451 MCInst AArch64InstrInfo::getNop() const { 4452 return MCInstBuilder(AArch64::HINT).addImm(0); 4453 } 4454 4455 // AArch64 supports MachineCombiner. 4456 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 4457 4458 // True when Opc sets flag 4459 static bool isCombineInstrSettingFlag(unsigned Opc) { 4460 switch (Opc) { 4461 case AArch64::ADDSWrr: 4462 case AArch64::ADDSWri: 4463 case AArch64::ADDSXrr: 4464 case AArch64::ADDSXri: 4465 case AArch64::SUBSWrr: 4466 case AArch64::SUBSXrr: 4467 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4468 case AArch64::SUBSWri: 4469 case AArch64::SUBSXri: 4470 return true; 4471 default: 4472 break; 4473 } 4474 return false; 4475 } 4476 4477 // 32b Opcodes that can be combined with a MUL 4478 static bool isCombineInstrCandidate32(unsigned Opc) { 4479 switch (Opc) { 4480 case AArch64::ADDWrr: 4481 case AArch64::ADDWri: 4482 case AArch64::SUBWrr: 4483 case AArch64::ADDSWrr: 4484 case AArch64::ADDSWri: 4485 case AArch64::SUBSWrr: 4486 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4487 case AArch64::SUBWri: 4488 case AArch64::SUBSWri: 4489 return true; 4490 default: 4491 break; 4492 } 4493 return false; 4494 } 4495 4496 // 64b Opcodes that can be combined with a MUL 4497 static bool isCombineInstrCandidate64(unsigned Opc) { 4498 switch (Opc) { 4499 case AArch64::ADDXrr: 4500 case AArch64::ADDXri: 4501 case AArch64::SUBXrr: 4502 case AArch64::ADDSXrr: 4503 case AArch64::ADDSXri: 4504 case AArch64::SUBSXrr: 4505 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 4506 case AArch64::SUBXri: 4507 case AArch64::SUBSXri: 4508 case AArch64::ADDv8i8: 4509 case AArch64::ADDv16i8: 4510 case AArch64::ADDv4i16: 4511 case AArch64::ADDv8i16: 4512 case AArch64::ADDv2i32: 4513 case AArch64::ADDv4i32: 4514 case AArch64::SUBv8i8: 4515 case AArch64::SUBv16i8: 4516 case AArch64::SUBv4i16: 4517 case AArch64::SUBv8i16: 4518 case AArch64::SUBv2i32: 4519 case AArch64::SUBv4i32: 4520 return true; 4521 default: 4522 break; 4523 } 4524 return false; 4525 } 4526 4527 // FP Opcodes that can be combined with a FMUL. 4528 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 4529 switch (Inst.getOpcode()) { 4530 default: 4531 break; 4532 case AArch64::FADDHrr: 4533 case AArch64::FADDSrr: 4534 case AArch64::FADDDrr: 4535 case AArch64::FADDv4f16: 4536 case AArch64::FADDv8f16: 4537 case AArch64::FADDv2f32: 4538 case AArch64::FADDv2f64: 4539 case AArch64::FADDv4f32: 4540 case AArch64::FSUBHrr: 4541 case AArch64::FSUBSrr: 4542 case AArch64::FSUBDrr: 4543 case AArch64::FSUBv4f16: 4544 case AArch64::FSUBv8f16: 4545 case AArch64::FSUBv2f32: 4546 case AArch64::FSUBv2f64: 4547 case AArch64::FSUBv4f32: 4548 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 4549 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by 4550 // the target options or if FADD/FSUB has the contract fast-math flag. 4551 return Options.UnsafeFPMath || 4552 Options.AllowFPOpFusion == FPOpFusion::Fast || 4553 Inst.getFlag(MachineInstr::FmContract); 4554 return true; 4555 } 4556 return false; 4557 } 4558 4559 // Opcodes that can be combined with a MUL 4560 static bool isCombineInstrCandidate(unsigned Opc) { 4561 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 4562 } 4563 4564 // 4565 // Utility routine that checks if \param MO is defined by an 4566 // \param CombineOpc instruction in the basic block \param MBB 4567 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 4568 unsigned CombineOpc, unsigned ZeroReg = 0, 4569 bool CheckZeroReg = false) { 4570 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4571 MachineInstr *MI = nullptr; 4572 4573 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 4574 MI = MRI.getUniqueVRegDef(MO.getReg()); 4575 // And it needs to be in the trace (otherwise, it won't have a depth). 4576 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 4577 return false; 4578 // Must only used by the user we combine with. 4579 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 4580 return false; 4581 4582 if (CheckZeroReg) { 4583 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 4584 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 4585 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 4586 // The third input reg must be zero. 4587 if (MI->getOperand(3).getReg() != ZeroReg) 4588 return false; 4589 } 4590 4591 return true; 4592 } 4593 4594 // 4595 // Is \param MO defined by an integer multiply and can be combined? 4596 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 4597 unsigned MulOpc, unsigned ZeroReg) { 4598 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 4599 } 4600 4601 // 4602 // Is \param MO defined by a floating-point multiply and can be combined? 4603 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 4604 unsigned MulOpc) { 4605 return canCombine(MBB, MO, MulOpc); 4606 } 4607 4608 // TODO: There are many more machine instruction opcodes to match: 4609 // 1. Other data types (integer, vectors) 4610 // 2. Other math / logic operations (xor, or) 4611 // 3. Other forms of the same operation (intrinsics and other variants) 4612 bool AArch64InstrInfo::isAssociativeAndCommutative( 4613 const MachineInstr &Inst) const { 4614 switch (Inst.getOpcode()) { 4615 case AArch64::FADDDrr: 4616 case AArch64::FADDSrr: 4617 case AArch64::FADDv2f32: 4618 case AArch64::FADDv2f64: 4619 case AArch64::FADDv4f32: 4620 case AArch64::FMULDrr: 4621 case AArch64::FMULSrr: 4622 case AArch64::FMULX32: 4623 case AArch64::FMULX64: 4624 case AArch64::FMULXv2f32: 4625 case AArch64::FMULXv2f64: 4626 case AArch64::FMULXv4f32: 4627 case AArch64::FMULv2f32: 4628 case AArch64::FMULv2f64: 4629 case AArch64::FMULv4f32: 4630 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 4631 default: 4632 return false; 4633 } 4634 } 4635 4636 /// Find instructions that can be turned into madd. 4637 static bool getMaddPatterns(MachineInstr &Root, 4638 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4639 unsigned Opc = Root.getOpcode(); 4640 MachineBasicBlock &MBB = *Root.getParent(); 4641 bool Found = false; 4642 4643 if (!isCombineInstrCandidate(Opc)) 4644 return false; 4645 if (isCombineInstrSettingFlag(Opc)) { 4646 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 4647 // When NZCV is live bail out. 4648 if (Cmp_NZCV == -1) 4649 return false; 4650 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 4651 // When opcode can't change bail out. 4652 // CHECKME: do we miss any cases for opcode conversion? 4653 if (NewOpc == Opc) 4654 return false; 4655 Opc = NewOpc; 4656 } 4657 4658 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, 4659 MachineCombinerPattern Pattern) { 4660 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { 4661 Patterns.push_back(Pattern); 4662 Found = true; 4663 } 4664 }; 4665 4666 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { 4667 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { 4668 Patterns.push_back(Pattern); 4669 Found = true; 4670 } 4671 }; 4672 4673 typedef MachineCombinerPattern MCP; 4674 4675 switch (Opc) { 4676 default: 4677 break; 4678 case AArch64::ADDWrr: 4679 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4680 "ADDWrr does not have register operands"); 4681 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); 4682 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); 4683 break; 4684 case AArch64::ADDXrr: 4685 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); 4686 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); 4687 break; 4688 case AArch64::SUBWrr: 4689 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); 4690 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); 4691 break; 4692 case AArch64::SUBXrr: 4693 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); 4694 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); 4695 break; 4696 case AArch64::ADDWri: 4697 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); 4698 break; 4699 case AArch64::ADDXri: 4700 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); 4701 break; 4702 case AArch64::SUBWri: 4703 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); 4704 break; 4705 case AArch64::SUBXri: 4706 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); 4707 break; 4708 case AArch64::ADDv8i8: 4709 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); 4710 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); 4711 break; 4712 case AArch64::ADDv16i8: 4713 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); 4714 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); 4715 break; 4716 case AArch64::ADDv4i16: 4717 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); 4718 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); 4719 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); 4720 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); 4721 break; 4722 case AArch64::ADDv8i16: 4723 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); 4724 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); 4725 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); 4726 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); 4727 break; 4728 case AArch64::ADDv2i32: 4729 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); 4730 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); 4731 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); 4732 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); 4733 break; 4734 case AArch64::ADDv4i32: 4735 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); 4736 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); 4737 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); 4738 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); 4739 break; 4740 case AArch64::SUBv8i8: 4741 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); 4742 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); 4743 break; 4744 case AArch64::SUBv16i8: 4745 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); 4746 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); 4747 break; 4748 case AArch64::SUBv4i16: 4749 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); 4750 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); 4751 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); 4752 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); 4753 break; 4754 case AArch64::SUBv8i16: 4755 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); 4756 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); 4757 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); 4758 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); 4759 break; 4760 case AArch64::SUBv2i32: 4761 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); 4762 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); 4763 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); 4764 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); 4765 break; 4766 case AArch64::SUBv4i32: 4767 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); 4768 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); 4769 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); 4770 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); 4771 break; 4772 } 4773 return Found; 4774 } 4775 /// Floating-Point Support 4776 4777 /// Find instructions that can be turned into madd. 4778 static bool getFMAPatterns(MachineInstr &Root, 4779 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4780 4781 if (!isCombineInstrCandidateFP(Root)) 4782 return false; 4783 4784 MachineBasicBlock &MBB = *Root.getParent(); 4785 bool Found = false; 4786 4787 auto Match = [&](int Opcode, int Operand, 4788 MachineCombinerPattern Pattern) -> bool { 4789 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { 4790 Patterns.push_back(Pattern); 4791 return true; 4792 } 4793 return false; 4794 }; 4795 4796 typedef MachineCombinerPattern MCP; 4797 4798 switch (Root.getOpcode()) { 4799 default: 4800 assert(false && "Unsupported FP instruction in combiner\n"); 4801 break; 4802 case AArch64::FADDHrr: 4803 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4804 "FADDHrr does not have register operands"); 4805 4806 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); 4807 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); 4808 break; 4809 case AArch64::FADDSrr: 4810 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4811 "FADDSrr does not have register operands"); 4812 4813 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || 4814 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); 4815 4816 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || 4817 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); 4818 break; 4819 case AArch64::FADDDrr: 4820 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || 4821 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); 4822 4823 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || 4824 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); 4825 break; 4826 case AArch64::FADDv4f16: 4827 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || 4828 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); 4829 4830 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || 4831 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); 4832 break; 4833 case AArch64::FADDv8f16: 4834 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || 4835 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); 4836 4837 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || 4838 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); 4839 break; 4840 case AArch64::FADDv2f32: 4841 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || 4842 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); 4843 4844 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || 4845 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); 4846 break; 4847 case AArch64::FADDv2f64: 4848 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || 4849 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); 4850 4851 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || 4852 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); 4853 break; 4854 case AArch64::FADDv4f32: 4855 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || 4856 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); 4857 4858 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || 4859 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); 4860 break; 4861 case AArch64::FSUBHrr: 4862 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); 4863 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); 4864 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); 4865 break; 4866 case AArch64::FSUBSrr: 4867 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); 4868 4869 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || 4870 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); 4871 4872 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); 4873 break; 4874 case AArch64::FSUBDrr: 4875 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); 4876 4877 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || 4878 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); 4879 4880 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); 4881 break; 4882 case AArch64::FSUBv4f16: 4883 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || 4884 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); 4885 4886 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || 4887 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); 4888 break; 4889 case AArch64::FSUBv8f16: 4890 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || 4891 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); 4892 4893 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || 4894 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); 4895 break; 4896 case AArch64::FSUBv2f32: 4897 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || 4898 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); 4899 4900 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || 4901 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); 4902 break; 4903 case AArch64::FSUBv2f64: 4904 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || 4905 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); 4906 4907 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || 4908 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); 4909 break; 4910 case AArch64::FSUBv4f32: 4911 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || 4912 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); 4913 4914 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || 4915 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); 4916 break; 4917 } 4918 return Found; 4919 } 4920 4921 static bool getFMULPatterns(MachineInstr &Root, 4922 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4923 MachineBasicBlock &MBB = *Root.getParent(); 4924 bool Found = false; 4925 4926 auto Match = [&](unsigned Opcode, int Operand, 4927 MachineCombinerPattern Pattern) -> bool { 4928 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4929 MachineOperand &MO = Root.getOperand(Operand); 4930 MachineInstr *MI = nullptr; 4931 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 4932 MI = MRI.getUniqueVRegDef(MO.getReg()); 4933 if (MI && MI->getOpcode() == Opcode) { 4934 Patterns.push_back(Pattern); 4935 return true; 4936 } 4937 return false; 4938 }; 4939 4940 typedef MachineCombinerPattern MCP; 4941 4942 switch (Root.getOpcode()) { 4943 default: 4944 return false; 4945 case AArch64::FMULv2f32: 4946 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1); 4947 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2); 4948 break; 4949 case AArch64::FMULv2f64: 4950 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1); 4951 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2); 4952 break; 4953 case AArch64::FMULv4f16: 4954 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1); 4955 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2); 4956 break; 4957 case AArch64::FMULv4f32: 4958 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1); 4959 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2); 4960 break; 4961 case AArch64::FMULv8f16: 4962 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1); 4963 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2); 4964 break; 4965 } 4966 4967 return Found; 4968 } 4969 4970 /// Return true when a code sequence can improve throughput. It 4971 /// should be called only for instructions in loops. 4972 /// \param Pattern - combiner pattern 4973 bool AArch64InstrInfo::isThroughputPattern( 4974 MachineCombinerPattern Pattern) const { 4975 switch (Pattern) { 4976 default: 4977 break; 4978 case MachineCombinerPattern::FMULADDH_OP1: 4979 case MachineCombinerPattern::FMULADDH_OP2: 4980 case MachineCombinerPattern::FMULSUBH_OP1: 4981 case MachineCombinerPattern::FMULSUBH_OP2: 4982 case MachineCombinerPattern::FMULADDS_OP1: 4983 case MachineCombinerPattern::FMULADDS_OP2: 4984 case MachineCombinerPattern::FMULSUBS_OP1: 4985 case MachineCombinerPattern::FMULSUBS_OP2: 4986 case MachineCombinerPattern::FMULADDD_OP1: 4987 case MachineCombinerPattern::FMULADDD_OP2: 4988 case MachineCombinerPattern::FMULSUBD_OP1: 4989 case MachineCombinerPattern::FMULSUBD_OP2: 4990 case MachineCombinerPattern::FNMULSUBH_OP1: 4991 case MachineCombinerPattern::FNMULSUBS_OP1: 4992 case MachineCombinerPattern::FNMULSUBD_OP1: 4993 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 4994 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 4995 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 4996 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 4997 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4998 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4999 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 5000 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5001 case MachineCombinerPattern::FMLAv4f16_OP2: 5002 case MachineCombinerPattern::FMLAv4f16_OP1: 5003 case MachineCombinerPattern::FMLAv8f16_OP1: 5004 case MachineCombinerPattern::FMLAv8f16_OP2: 5005 case MachineCombinerPattern::FMLAv2f32_OP2: 5006 case MachineCombinerPattern::FMLAv2f32_OP1: 5007 case MachineCombinerPattern::FMLAv2f64_OP1: 5008 case MachineCombinerPattern::FMLAv2f64_OP2: 5009 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5010 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5011 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5012 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5013 case MachineCombinerPattern::FMLAv4f32_OP1: 5014 case MachineCombinerPattern::FMLAv4f32_OP2: 5015 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5016 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5017 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: 5018 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 5019 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: 5020 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 5021 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 5022 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 5023 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 5024 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 5025 case MachineCombinerPattern::FMLSv4f16_OP1: 5026 case MachineCombinerPattern::FMLSv4f16_OP2: 5027 case MachineCombinerPattern::FMLSv8f16_OP1: 5028 case MachineCombinerPattern::FMLSv8f16_OP2: 5029 case MachineCombinerPattern::FMLSv2f32_OP2: 5030 case MachineCombinerPattern::FMLSv2f64_OP2: 5031 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 5032 case MachineCombinerPattern::FMLSv4f32_OP2: 5033 case MachineCombinerPattern::FMULv2i32_indexed_OP1: 5034 case MachineCombinerPattern::FMULv2i32_indexed_OP2: 5035 case MachineCombinerPattern::FMULv2i64_indexed_OP1: 5036 case MachineCombinerPattern::FMULv2i64_indexed_OP2: 5037 case MachineCombinerPattern::FMULv4i16_indexed_OP1: 5038 case MachineCombinerPattern::FMULv4i16_indexed_OP2: 5039 case MachineCombinerPattern::FMULv4i32_indexed_OP1: 5040 case MachineCombinerPattern::FMULv4i32_indexed_OP2: 5041 case MachineCombinerPattern::FMULv8i16_indexed_OP1: 5042 case MachineCombinerPattern::FMULv8i16_indexed_OP2: 5043 case MachineCombinerPattern::MULADDv8i8_OP1: 5044 case MachineCombinerPattern::MULADDv8i8_OP2: 5045 case MachineCombinerPattern::MULADDv16i8_OP1: 5046 case MachineCombinerPattern::MULADDv16i8_OP2: 5047 case MachineCombinerPattern::MULADDv4i16_OP1: 5048 case MachineCombinerPattern::MULADDv4i16_OP2: 5049 case MachineCombinerPattern::MULADDv8i16_OP1: 5050 case MachineCombinerPattern::MULADDv8i16_OP2: 5051 case MachineCombinerPattern::MULADDv2i32_OP1: 5052 case MachineCombinerPattern::MULADDv2i32_OP2: 5053 case MachineCombinerPattern::MULADDv4i32_OP1: 5054 case MachineCombinerPattern::MULADDv4i32_OP2: 5055 case MachineCombinerPattern::MULSUBv8i8_OP1: 5056 case MachineCombinerPattern::MULSUBv8i8_OP2: 5057 case MachineCombinerPattern::MULSUBv16i8_OP1: 5058 case MachineCombinerPattern::MULSUBv16i8_OP2: 5059 case MachineCombinerPattern::MULSUBv4i16_OP1: 5060 case MachineCombinerPattern::MULSUBv4i16_OP2: 5061 case MachineCombinerPattern::MULSUBv8i16_OP1: 5062 case MachineCombinerPattern::MULSUBv8i16_OP2: 5063 case MachineCombinerPattern::MULSUBv2i32_OP1: 5064 case MachineCombinerPattern::MULSUBv2i32_OP2: 5065 case MachineCombinerPattern::MULSUBv4i32_OP1: 5066 case MachineCombinerPattern::MULSUBv4i32_OP2: 5067 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 5068 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 5069 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 5070 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 5071 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 5072 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 5073 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 5074 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 5075 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 5076 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 5077 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 5078 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 5079 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 5080 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 5081 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 5082 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 5083 return true; 5084 } // end switch (Pattern) 5085 return false; 5086 } 5087 /// Return true when there is potentially a faster code sequence for an 5088 /// instruction chain ending in \p Root. All potential patterns are listed in 5089 /// the \p Pattern vector. Pattern should be sorted in priority order since the 5090 /// pattern evaluator stops checking as soon as it finds a faster sequence. 5091 5092 bool AArch64InstrInfo::getMachineCombinerPatterns( 5093 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 5094 bool DoRegPressureReduce) const { 5095 // Integer patterns 5096 if (getMaddPatterns(Root, Patterns)) 5097 return true; 5098 // Floating point patterns 5099 if (getFMULPatterns(Root, Patterns)) 5100 return true; 5101 if (getFMAPatterns(Root, Patterns)) 5102 return true; 5103 5104 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 5105 DoRegPressureReduce); 5106 } 5107 5108 enum class FMAInstKind { Default, Indexed, Accumulator }; 5109 /// genFusedMultiply - Generate fused multiply instructions. 5110 /// This function supports both integer and floating point instructions. 5111 /// A typical example: 5112 /// F|MUL I=A,B,0 5113 /// F|ADD R,I,C 5114 /// ==> F|MADD R,A,B,C 5115 /// \param MF Containing MachineFunction 5116 /// \param MRI Register information 5117 /// \param TII Target information 5118 /// \param Root is the F|ADD instruction 5119 /// \param [out] InsInstrs is a vector of machine instructions and will 5120 /// contain the generated madd instruction 5121 /// \param IdxMulOpd is index of operand in Root that is the result of 5122 /// the F|MUL. In the example above IdxMulOpd is 1. 5123 /// \param MaddOpc the opcode fo the f|madd instruction 5124 /// \param RC Register class of operands 5125 /// \param kind of fma instruction (addressing mode) to be generated 5126 /// \param ReplacedAddend is the result register from the instruction 5127 /// replacing the non-combined operand, if any. 5128 static MachineInstr * 5129 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 5130 const TargetInstrInfo *TII, MachineInstr &Root, 5131 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 5132 unsigned MaddOpc, const TargetRegisterClass *RC, 5133 FMAInstKind kind = FMAInstKind::Default, 5134 const Register *ReplacedAddend = nullptr) { 5135 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 5136 5137 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 5138 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 5139 Register ResultReg = Root.getOperand(0).getReg(); 5140 Register SrcReg0 = MUL->getOperand(1).getReg(); 5141 bool Src0IsKill = MUL->getOperand(1).isKill(); 5142 Register SrcReg1 = MUL->getOperand(2).getReg(); 5143 bool Src1IsKill = MUL->getOperand(2).isKill(); 5144 5145 unsigned SrcReg2; 5146 bool Src2IsKill; 5147 if (ReplacedAddend) { 5148 // If we just generated a new addend, we must be it's only use. 5149 SrcReg2 = *ReplacedAddend; 5150 Src2IsKill = true; 5151 } else { 5152 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 5153 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 5154 } 5155 5156 if (Register::isVirtualRegister(ResultReg)) 5157 MRI.constrainRegClass(ResultReg, RC); 5158 if (Register::isVirtualRegister(SrcReg0)) 5159 MRI.constrainRegClass(SrcReg0, RC); 5160 if (Register::isVirtualRegister(SrcReg1)) 5161 MRI.constrainRegClass(SrcReg1, RC); 5162 if (Register::isVirtualRegister(SrcReg2)) 5163 MRI.constrainRegClass(SrcReg2, RC); 5164 5165 MachineInstrBuilder MIB; 5166 if (kind == FMAInstKind::Default) 5167 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5168 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5169 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5170 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 5171 else if (kind == FMAInstKind::Indexed) 5172 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5173 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 5174 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5175 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5176 .addImm(MUL->getOperand(3).getImm()); 5177 else if (kind == FMAInstKind::Accumulator) 5178 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5179 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 5180 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5181 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 5182 else 5183 assert(false && "Invalid FMA instruction kind \n"); 5184 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 5185 InsInstrs.push_back(MIB); 5186 return MUL; 5187 } 5188 5189 /// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane) 5190 static MachineInstr * 5191 genIndexedMultiply(MachineInstr &Root, 5192 SmallVectorImpl<MachineInstr *> &InsInstrs, 5193 unsigned IdxDupOp, unsigned MulOpc, 5194 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { 5195 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) && 5196 "Invalid index of FMUL operand"); 5197 5198 MachineFunction &MF = *Root.getMF(); 5199 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5200 5201 MachineInstr *Dup = 5202 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); 5203 5204 Register DupSrcReg = Dup->getOperand(1).getReg(); 5205 MRI.clearKillFlags(DupSrcReg); 5206 MRI.constrainRegClass(DupSrcReg, RC); 5207 5208 unsigned DupSrcLane = Dup->getOperand(2).getImm(); 5209 5210 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1; 5211 MachineOperand &MulOp = Root.getOperand(IdxMulOp); 5212 5213 Register ResultReg = Root.getOperand(0).getReg(); 5214 5215 MachineInstrBuilder MIB; 5216 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg) 5217 .add(MulOp) 5218 .addReg(DupSrcReg) 5219 .addImm(DupSrcLane); 5220 5221 InsInstrs.push_back(MIB); 5222 return &Root; 5223 } 5224 5225 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate 5226 /// instructions. 5227 /// 5228 /// \see genFusedMultiply 5229 static MachineInstr *genFusedMultiplyAcc( 5230 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5231 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5232 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 5233 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5234 FMAInstKind::Accumulator); 5235 } 5236 5237 /// genNeg - Helper to generate an intermediate negation of the second operand 5238 /// of Root 5239 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, 5240 const TargetInstrInfo *TII, MachineInstr &Root, 5241 SmallVectorImpl<MachineInstr *> &InsInstrs, 5242 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 5243 unsigned MnegOpc, const TargetRegisterClass *RC) { 5244 Register NewVR = MRI.createVirtualRegister(RC); 5245 MachineInstrBuilder MIB = 5246 BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR) 5247 .add(Root.getOperand(2)); 5248 InsInstrs.push_back(MIB); 5249 5250 assert(InstrIdxForVirtReg.empty()); 5251 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5252 5253 return NewVR; 5254 } 5255 5256 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 5257 /// instructions with an additional negation of the accumulator 5258 static MachineInstr *genFusedMultiplyAccNeg( 5259 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5260 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5261 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 5262 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 5263 assert(IdxMulOpd == 1); 5264 5265 Register NewVR = 5266 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 5267 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5268 FMAInstKind::Accumulator, &NewVR); 5269 } 5270 5271 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate 5272 /// instructions. 5273 /// 5274 /// \see genFusedMultiply 5275 static MachineInstr *genFusedMultiplyIdx( 5276 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5277 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5278 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 5279 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5280 FMAInstKind::Indexed); 5281 } 5282 5283 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 5284 /// instructions with an additional negation of the accumulator 5285 static MachineInstr *genFusedMultiplyIdxNeg( 5286 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 5287 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 5288 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 5289 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 5290 assert(IdxMulOpd == 1); 5291 5292 Register NewVR = 5293 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 5294 5295 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 5296 FMAInstKind::Indexed, &NewVR); 5297 } 5298 5299 /// genMaddR - Generate madd instruction and combine mul and add using 5300 /// an extra virtual register 5301 /// Example - an ADD intermediate needs to be stored in a register: 5302 /// MUL I=A,B,0 5303 /// ADD R,I,Imm 5304 /// ==> ORR V, ZR, Imm 5305 /// ==> MADD R,A,B,V 5306 /// \param MF Containing MachineFunction 5307 /// \param MRI Register information 5308 /// \param TII Target information 5309 /// \param Root is the ADD instruction 5310 /// \param [out] InsInstrs is a vector of machine instructions and will 5311 /// contain the generated madd instruction 5312 /// \param IdxMulOpd is index of operand in Root that is the result of 5313 /// the MUL. In the example above IdxMulOpd is 1. 5314 /// \param MaddOpc the opcode fo the madd instruction 5315 /// \param VR is a virtual register that holds the value of an ADD operand 5316 /// (V in the example above). 5317 /// \param RC Register class of operands 5318 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 5319 const TargetInstrInfo *TII, MachineInstr &Root, 5320 SmallVectorImpl<MachineInstr *> &InsInstrs, 5321 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 5322 const TargetRegisterClass *RC) { 5323 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 5324 5325 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 5326 Register ResultReg = Root.getOperand(0).getReg(); 5327 Register SrcReg0 = MUL->getOperand(1).getReg(); 5328 bool Src0IsKill = MUL->getOperand(1).isKill(); 5329 Register SrcReg1 = MUL->getOperand(2).getReg(); 5330 bool Src1IsKill = MUL->getOperand(2).isKill(); 5331 5332 if (Register::isVirtualRegister(ResultReg)) 5333 MRI.constrainRegClass(ResultReg, RC); 5334 if (Register::isVirtualRegister(SrcReg0)) 5335 MRI.constrainRegClass(SrcReg0, RC); 5336 if (Register::isVirtualRegister(SrcReg1)) 5337 MRI.constrainRegClass(SrcReg1, RC); 5338 if (Register::isVirtualRegister(VR)) 5339 MRI.constrainRegClass(VR, RC); 5340 5341 MachineInstrBuilder MIB = 5342 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 5343 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 5344 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 5345 .addReg(VR); 5346 // Insert the MADD 5347 InsInstrs.push_back(MIB); 5348 return MUL; 5349 } 5350 5351 /// When getMachineCombinerPatterns() finds potential patterns, 5352 /// this function generates the instructions that could replace the 5353 /// original code sequence 5354 void AArch64InstrInfo::genAlternativeCodeSequence( 5355 MachineInstr &Root, MachineCombinerPattern Pattern, 5356 SmallVectorImpl<MachineInstr *> &InsInstrs, 5357 SmallVectorImpl<MachineInstr *> &DelInstrs, 5358 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 5359 MachineBasicBlock &MBB = *Root.getParent(); 5360 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 5361 MachineFunction &MF = *MBB.getParent(); 5362 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 5363 5364 MachineInstr *MUL = nullptr; 5365 const TargetRegisterClass *RC; 5366 unsigned Opc; 5367 switch (Pattern) { 5368 default: 5369 // Reassociate instructions. 5370 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 5371 DelInstrs, InstrIdxForVirtReg); 5372 return; 5373 case MachineCombinerPattern::MULADDW_OP1: 5374 case MachineCombinerPattern::MULADDX_OP1: 5375 // MUL I=A,B,0 5376 // ADD R,I,C 5377 // ==> MADD R,A,B,C 5378 // --- Create(MADD); 5379 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 5380 Opc = AArch64::MADDWrrr; 5381 RC = &AArch64::GPR32RegClass; 5382 } else { 5383 Opc = AArch64::MADDXrrr; 5384 RC = &AArch64::GPR64RegClass; 5385 } 5386 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5387 break; 5388 case MachineCombinerPattern::MULADDW_OP2: 5389 case MachineCombinerPattern::MULADDX_OP2: 5390 // MUL I=A,B,0 5391 // ADD R,C,I 5392 // ==> MADD R,A,B,C 5393 // --- Create(MADD); 5394 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 5395 Opc = AArch64::MADDWrrr; 5396 RC = &AArch64::GPR32RegClass; 5397 } else { 5398 Opc = AArch64::MADDXrrr; 5399 RC = &AArch64::GPR64RegClass; 5400 } 5401 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5402 break; 5403 case MachineCombinerPattern::MULADDWI_OP1: 5404 case MachineCombinerPattern::MULADDXI_OP1: { 5405 // MUL I=A,B,0 5406 // ADD R,I,Imm 5407 // ==> ORR V, ZR, Imm 5408 // ==> MADD R,A,B,V 5409 // --- Create(MADD); 5410 const TargetRegisterClass *OrrRC; 5411 unsigned BitSize, OrrOpc, ZeroReg; 5412 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 5413 OrrOpc = AArch64::ORRWri; 5414 OrrRC = &AArch64::GPR32spRegClass; 5415 BitSize = 32; 5416 ZeroReg = AArch64::WZR; 5417 Opc = AArch64::MADDWrrr; 5418 RC = &AArch64::GPR32RegClass; 5419 } else { 5420 OrrOpc = AArch64::ORRXri; 5421 OrrRC = &AArch64::GPR64spRegClass; 5422 BitSize = 64; 5423 ZeroReg = AArch64::XZR; 5424 Opc = AArch64::MADDXrrr; 5425 RC = &AArch64::GPR64RegClass; 5426 } 5427 Register NewVR = MRI.createVirtualRegister(OrrRC); 5428 uint64_t Imm = Root.getOperand(2).getImm(); 5429 5430 if (Root.getOperand(3).isImm()) { 5431 unsigned Val = Root.getOperand(3).getImm(); 5432 Imm = Imm << Val; 5433 } 5434 uint64_t UImm = SignExtend64(Imm, BitSize); 5435 uint64_t Encoding; 5436 if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) 5437 return; 5438 MachineInstrBuilder MIB1 = 5439 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 5440 .addReg(ZeroReg) 5441 .addImm(Encoding); 5442 InsInstrs.push_back(MIB1); 5443 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5444 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5445 break; 5446 } 5447 case MachineCombinerPattern::MULSUBW_OP1: 5448 case MachineCombinerPattern::MULSUBX_OP1: { 5449 // MUL I=A,B,0 5450 // SUB R,I, C 5451 // ==> SUB V, 0, C 5452 // ==> MADD R,A,B,V // = -C + A*B 5453 // --- Create(MADD); 5454 const TargetRegisterClass *SubRC; 5455 unsigned SubOpc, ZeroReg; 5456 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 5457 SubOpc = AArch64::SUBWrr; 5458 SubRC = &AArch64::GPR32spRegClass; 5459 ZeroReg = AArch64::WZR; 5460 Opc = AArch64::MADDWrrr; 5461 RC = &AArch64::GPR32RegClass; 5462 } else { 5463 SubOpc = AArch64::SUBXrr; 5464 SubRC = &AArch64::GPR64spRegClass; 5465 ZeroReg = AArch64::XZR; 5466 Opc = AArch64::MADDXrrr; 5467 RC = &AArch64::GPR64RegClass; 5468 } 5469 Register NewVR = MRI.createVirtualRegister(SubRC); 5470 // SUB NewVR, 0, C 5471 MachineInstrBuilder MIB1 = 5472 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 5473 .addReg(ZeroReg) 5474 .add(Root.getOperand(2)); 5475 InsInstrs.push_back(MIB1); 5476 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5477 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5478 break; 5479 } 5480 case MachineCombinerPattern::MULSUBW_OP2: 5481 case MachineCombinerPattern::MULSUBX_OP2: 5482 // MUL I=A,B,0 5483 // SUB R,C,I 5484 // ==> MSUB R,A,B,C (computes C - A*B) 5485 // --- Create(MSUB); 5486 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 5487 Opc = AArch64::MSUBWrrr; 5488 RC = &AArch64::GPR32RegClass; 5489 } else { 5490 Opc = AArch64::MSUBXrrr; 5491 RC = &AArch64::GPR64RegClass; 5492 } 5493 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5494 break; 5495 case MachineCombinerPattern::MULSUBWI_OP1: 5496 case MachineCombinerPattern::MULSUBXI_OP1: { 5497 // MUL I=A,B,0 5498 // SUB R,I, Imm 5499 // ==> ORR V, ZR, -Imm 5500 // ==> MADD R,A,B,V // = -Imm + A*B 5501 // --- Create(MADD); 5502 const TargetRegisterClass *OrrRC; 5503 unsigned BitSize, OrrOpc, ZeroReg; 5504 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 5505 OrrOpc = AArch64::ORRWri; 5506 OrrRC = &AArch64::GPR32spRegClass; 5507 BitSize = 32; 5508 ZeroReg = AArch64::WZR; 5509 Opc = AArch64::MADDWrrr; 5510 RC = &AArch64::GPR32RegClass; 5511 } else { 5512 OrrOpc = AArch64::ORRXri; 5513 OrrRC = &AArch64::GPR64spRegClass; 5514 BitSize = 64; 5515 ZeroReg = AArch64::XZR; 5516 Opc = AArch64::MADDXrrr; 5517 RC = &AArch64::GPR64RegClass; 5518 } 5519 Register NewVR = MRI.createVirtualRegister(OrrRC); 5520 uint64_t Imm = Root.getOperand(2).getImm(); 5521 if (Root.getOperand(3).isImm()) { 5522 unsigned Val = Root.getOperand(3).getImm(); 5523 Imm = Imm << Val; 5524 } 5525 uint64_t UImm = SignExtend64(-Imm, BitSize); 5526 uint64_t Encoding; 5527 if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) 5528 return; 5529 MachineInstrBuilder MIB1 = 5530 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 5531 .addReg(ZeroReg) 5532 .addImm(Encoding); 5533 InsInstrs.push_back(MIB1); 5534 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5535 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 5536 break; 5537 } 5538 5539 case MachineCombinerPattern::MULADDv8i8_OP1: 5540 Opc = AArch64::MLAv8i8; 5541 RC = &AArch64::FPR64RegClass; 5542 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5543 break; 5544 case MachineCombinerPattern::MULADDv8i8_OP2: 5545 Opc = AArch64::MLAv8i8; 5546 RC = &AArch64::FPR64RegClass; 5547 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5548 break; 5549 case MachineCombinerPattern::MULADDv16i8_OP1: 5550 Opc = AArch64::MLAv16i8; 5551 RC = &AArch64::FPR128RegClass; 5552 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5553 break; 5554 case MachineCombinerPattern::MULADDv16i8_OP2: 5555 Opc = AArch64::MLAv16i8; 5556 RC = &AArch64::FPR128RegClass; 5557 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5558 break; 5559 case MachineCombinerPattern::MULADDv4i16_OP1: 5560 Opc = AArch64::MLAv4i16; 5561 RC = &AArch64::FPR64RegClass; 5562 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5563 break; 5564 case MachineCombinerPattern::MULADDv4i16_OP2: 5565 Opc = AArch64::MLAv4i16; 5566 RC = &AArch64::FPR64RegClass; 5567 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5568 break; 5569 case MachineCombinerPattern::MULADDv8i16_OP1: 5570 Opc = AArch64::MLAv8i16; 5571 RC = &AArch64::FPR128RegClass; 5572 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5573 break; 5574 case MachineCombinerPattern::MULADDv8i16_OP2: 5575 Opc = AArch64::MLAv8i16; 5576 RC = &AArch64::FPR128RegClass; 5577 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5578 break; 5579 case MachineCombinerPattern::MULADDv2i32_OP1: 5580 Opc = AArch64::MLAv2i32; 5581 RC = &AArch64::FPR64RegClass; 5582 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5583 break; 5584 case MachineCombinerPattern::MULADDv2i32_OP2: 5585 Opc = AArch64::MLAv2i32; 5586 RC = &AArch64::FPR64RegClass; 5587 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5588 break; 5589 case MachineCombinerPattern::MULADDv4i32_OP1: 5590 Opc = AArch64::MLAv4i32; 5591 RC = &AArch64::FPR128RegClass; 5592 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5593 break; 5594 case MachineCombinerPattern::MULADDv4i32_OP2: 5595 Opc = AArch64::MLAv4i32; 5596 RC = &AArch64::FPR128RegClass; 5597 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5598 break; 5599 5600 case MachineCombinerPattern::MULSUBv8i8_OP1: 5601 Opc = AArch64::MLAv8i8; 5602 RC = &AArch64::FPR64RegClass; 5603 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5604 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, 5605 RC); 5606 break; 5607 case MachineCombinerPattern::MULSUBv8i8_OP2: 5608 Opc = AArch64::MLSv8i8; 5609 RC = &AArch64::FPR64RegClass; 5610 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5611 break; 5612 case MachineCombinerPattern::MULSUBv16i8_OP1: 5613 Opc = AArch64::MLAv16i8; 5614 RC = &AArch64::FPR128RegClass; 5615 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5616 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, 5617 RC); 5618 break; 5619 case MachineCombinerPattern::MULSUBv16i8_OP2: 5620 Opc = AArch64::MLSv16i8; 5621 RC = &AArch64::FPR128RegClass; 5622 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5623 break; 5624 case MachineCombinerPattern::MULSUBv4i16_OP1: 5625 Opc = AArch64::MLAv4i16; 5626 RC = &AArch64::FPR64RegClass; 5627 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5628 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 5629 RC); 5630 break; 5631 case MachineCombinerPattern::MULSUBv4i16_OP2: 5632 Opc = AArch64::MLSv4i16; 5633 RC = &AArch64::FPR64RegClass; 5634 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5635 break; 5636 case MachineCombinerPattern::MULSUBv8i16_OP1: 5637 Opc = AArch64::MLAv8i16; 5638 RC = &AArch64::FPR128RegClass; 5639 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5640 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 5641 RC); 5642 break; 5643 case MachineCombinerPattern::MULSUBv8i16_OP2: 5644 Opc = AArch64::MLSv8i16; 5645 RC = &AArch64::FPR128RegClass; 5646 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5647 break; 5648 case MachineCombinerPattern::MULSUBv2i32_OP1: 5649 Opc = AArch64::MLAv2i32; 5650 RC = &AArch64::FPR64RegClass; 5651 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5652 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 5653 RC); 5654 break; 5655 case MachineCombinerPattern::MULSUBv2i32_OP2: 5656 Opc = AArch64::MLSv2i32; 5657 RC = &AArch64::FPR64RegClass; 5658 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5659 break; 5660 case MachineCombinerPattern::MULSUBv4i32_OP1: 5661 Opc = AArch64::MLAv4i32; 5662 RC = &AArch64::FPR128RegClass; 5663 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 5664 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 5665 RC); 5666 break; 5667 case MachineCombinerPattern::MULSUBv4i32_OP2: 5668 Opc = AArch64::MLSv4i32; 5669 RC = &AArch64::FPR128RegClass; 5670 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5671 break; 5672 5673 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 5674 Opc = AArch64::MLAv4i16_indexed; 5675 RC = &AArch64::FPR64RegClass; 5676 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5677 break; 5678 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 5679 Opc = AArch64::MLAv4i16_indexed; 5680 RC = &AArch64::FPR64RegClass; 5681 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5682 break; 5683 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 5684 Opc = AArch64::MLAv8i16_indexed; 5685 RC = &AArch64::FPR128RegClass; 5686 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5687 break; 5688 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 5689 Opc = AArch64::MLAv8i16_indexed; 5690 RC = &AArch64::FPR128RegClass; 5691 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5692 break; 5693 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 5694 Opc = AArch64::MLAv2i32_indexed; 5695 RC = &AArch64::FPR64RegClass; 5696 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5697 break; 5698 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 5699 Opc = AArch64::MLAv2i32_indexed; 5700 RC = &AArch64::FPR64RegClass; 5701 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5702 break; 5703 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 5704 Opc = AArch64::MLAv4i32_indexed; 5705 RC = &AArch64::FPR128RegClass; 5706 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5707 break; 5708 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 5709 Opc = AArch64::MLAv4i32_indexed; 5710 RC = &AArch64::FPR128RegClass; 5711 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5712 break; 5713 5714 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 5715 Opc = AArch64::MLAv4i16_indexed; 5716 RC = &AArch64::FPR64RegClass; 5717 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5718 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 5719 RC); 5720 break; 5721 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 5722 Opc = AArch64::MLSv4i16_indexed; 5723 RC = &AArch64::FPR64RegClass; 5724 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5725 break; 5726 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 5727 Opc = AArch64::MLAv8i16_indexed; 5728 RC = &AArch64::FPR128RegClass; 5729 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5730 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 5731 RC); 5732 break; 5733 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 5734 Opc = AArch64::MLSv8i16_indexed; 5735 RC = &AArch64::FPR128RegClass; 5736 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5737 break; 5738 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 5739 Opc = AArch64::MLAv2i32_indexed; 5740 RC = &AArch64::FPR64RegClass; 5741 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5742 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 5743 RC); 5744 break; 5745 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 5746 Opc = AArch64::MLSv2i32_indexed; 5747 RC = &AArch64::FPR64RegClass; 5748 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5749 break; 5750 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 5751 Opc = AArch64::MLAv4i32_indexed; 5752 RC = &AArch64::FPR128RegClass; 5753 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 5754 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 5755 RC); 5756 break; 5757 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 5758 Opc = AArch64::MLSv4i32_indexed; 5759 RC = &AArch64::FPR128RegClass; 5760 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5761 break; 5762 5763 // Floating Point Support 5764 case MachineCombinerPattern::FMULADDH_OP1: 5765 Opc = AArch64::FMADDHrrr; 5766 RC = &AArch64::FPR16RegClass; 5767 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5768 break; 5769 case MachineCombinerPattern::FMULADDS_OP1: 5770 Opc = AArch64::FMADDSrrr; 5771 RC = &AArch64::FPR32RegClass; 5772 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5773 break; 5774 case MachineCombinerPattern::FMULADDD_OP1: 5775 Opc = AArch64::FMADDDrrr; 5776 RC = &AArch64::FPR64RegClass; 5777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5778 break; 5779 5780 case MachineCombinerPattern::FMULADDH_OP2: 5781 Opc = AArch64::FMADDHrrr; 5782 RC = &AArch64::FPR16RegClass; 5783 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5784 break; 5785 case MachineCombinerPattern::FMULADDS_OP2: 5786 Opc = AArch64::FMADDSrrr; 5787 RC = &AArch64::FPR32RegClass; 5788 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5789 break; 5790 case MachineCombinerPattern::FMULADDD_OP2: 5791 Opc = AArch64::FMADDDrrr; 5792 RC = &AArch64::FPR64RegClass; 5793 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5794 break; 5795 5796 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 5797 Opc = AArch64::FMLAv1i32_indexed; 5798 RC = &AArch64::FPR32RegClass; 5799 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5800 FMAInstKind::Indexed); 5801 break; 5802 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 5803 Opc = AArch64::FMLAv1i32_indexed; 5804 RC = &AArch64::FPR32RegClass; 5805 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5806 FMAInstKind::Indexed); 5807 break; 5808 5809 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 5810 Opc = AArch64::FMLAv1i64_indexed; 5811 RC = &AArch64::FPR64RegClass; 5812 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5813 FMAInstKind::Indexed); 5814 break; 5815 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5816 Opc = AArch64::FMLAv1i64_indexed; 5817 RC = &AArch64::FPR64RegClass; 5818 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5819 FMAInstKind::Indexed); 5820 break; 5821 5822 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 5823 RC = &AArch64::FPR64RegClass; 5824 Opc = AArch64::FMLAv4i16_indexed; 5825 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5826 FMAInstKind::Indexed); 5827 break; 5828 case MachineCombinerPattern::FMLAv4f16_OP1: 5829 RC = &AArch64::FPR64RegClass; 5830 Opc = AArch64::FMLAv4f16; 5831 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5832 FMAInstKind::Accumulator); 5833 break; 5834 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 5835 RC = &AArch64::FPR64RegClass; 5836 Opc = AArch64::FMLAv4i16_indexed; 5837 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5838 FMAInstKind::Indexed); 5839 break; 5840 case MachineCombinerPattern::FMLAv4f16_OP2: 5841 RC = &AArch64::FPR64RegClass; 5842 Opc = AArch64::FMLAv4f16; 5843 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5844 FMAInstKind::Accumulator); 5845 break; 5846 5847 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5848 case MachineCombinerPattern::FMLAv2f32_OP1: 5849 RC = &AArch64::FPR64RegClass; 5850 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 5851 Opc = AArch64::FMLAv2i32_indexed; 5852 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5853 FMAInstKind::Indexed); 5854 } else { 5855 Opc = AArch64::FMLAv2f32; 5856 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5857 FMAInstKind::Accumulator); 5858 } 5859 break; 5860 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5861 case MachineCombinerPattern::FMLAv2f32_OP2: 5862 RC = &AArch64::FPR64RegClass; 5863 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 5864 Opc = AArch64::FMLAv2i32_indexed; 5865 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5866 FMAInstKind::Indexed); 5867 } else { 5868 Opc = AArch64::FMLAv2f32; 5869 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5870 FMAInstKind::Accumulator); 5871 } 5872 break; 5873 5874 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 5875 RC = &AArch64::FPR128RegClass; 5876 Opc = AArch64::FMLAv8i16_indexed; 5877 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5878 FMAInstKind::Indexed); 5879 break; 5880 case MachineCombinerPattern::FMLAv8f16_OP1: 5881 RC = &AArch64::FPR128RegClass; 5882 Opc = AArch64::FMLAv8f16; 5883 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5884 FMAInstKind::Accumulator); 5885 break; 5886 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 5887 RC = &AArch64::FPR128RegClass; 5888 Opc = AArch64::FMLAv8i16_indexed; 5889 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5890 FMAInstKind::Indexed); 5891 break; 5892 case MachineCombinerPattern::FMLAv8f16_OP2: 5893 RC = &AArch64::FPR128RegClass; 5894 Opc = AArch64::FMLAv8f16; 5895 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5896 FMAInstKind::Accumulator); 5897 break; 5898 5899 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5900 case MachineCombinerPattern::FMLAv2f64_OP1: 5901 RC = &AArch64::FPR128RegClass; 5902 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 5903 Opc = AArch64::FMLAv2i64_indexed; 5904 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5905 FMAInstKind::Indexed); 5906 } else { 5907 Opc = AArch64::FMLAv2f64; 5908 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5909 FMAInstKind::Accumulator); 5910 } 5911 break; 5912 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5913 case MachineCombinerPattern::FMLAv2f64_OP2: 5914 RC = &AArch64::FPR128RegClass; 5915 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 5916 Opc = AArch64::FMLAv2i64_indexed; 5917 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5918 FMAInstKind::Indexed); 5919 } else { 5920 Opc = AArch64::FMLAv2f64; 5921 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5922 FMAInstKind::Accumulator); 5923 } 5924 break; 5925 5926 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5927 case MachineCombinerPattern::FMLAv4f32_OP1: 5928 RC = &AArch64::FPR128RegClass; 5929 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 5930 Opc = AArch64::FMLAv4i32_indexed; 5931 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5932 FMAInstKind::Indexed); 5933 } else { 5934 Opc = AArch64::FMLAv4f32; 5935 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5936 FMAInstKind::Accumulator); 5937 } 5938 break; 5939 5940 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5941 case MachineCombinerPattern::FMLAv4f32_OP2: 5942 RC = &AArch64::FPR128RegClass; 5943 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 5944 Opc = AArch64::FMLAv4i32_indexed; 5945 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5946 FMAInstKind::Indexed); 5947 } else { 5948 Opc = AArch64::FMLAv4f32; 5949 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5950 FMAInstKind::Accumulator); 5951 } 5952 break; 5953 5954 case MachineCombinerPattern::FMULSUBH_OP1: 5955 Opc = AArch64::FNMSUBHrrr; 5956 RC = &AArch64::FPR16RegClass; 5957 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5958 break; 5959 case MachineCombinerPattern::FMULSUBS_OP1: 5960 Opc = AArch64::FNMSUBSrrr; 5961 RC = &AArch64::FPR32RegClass; 5962 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5963 break; 5964 case MachineCombinerPattern::FMULSUBD_OP1: 5965 Opc = AArch64::FNMSUBDrrr; 5966 RC = &AArch64::FPR64RegClass; 5967 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5968 break; 5969 5970 case MachineCombinerPattern::FNMULSUBH_OP1: 5971 Opc = AArch64::FNMADDHrrr; 5972 RC = &AArch64::FPR16RegClass; 5973 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5974 break; 5975 case MachineCombinerPattern::FNMULSUBS_OP1: 5976 Opc = AArch64::FNMADDSrrr; 5977 RC = &AArch64::FPR32RegClass; 5978 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5979 break; 5980 case MachineCombinerPattern::FNMULSUBD_OP1: 5981 Opc = AArch64::FNMADDDrrr; 5982 RC = &AArch64::FPR64RegClass; 5983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5984 break; 5985 5986 case MachineCombinerPattern::FMULSUBH_OP2: 5987 Opc = AArch64::FMSUBHrrr; 5988 RC = &AArch64::FPR16RegClass; 5989 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5990 break; 5991 case MachineCombinerPattern::FMULSUBS_OP2: 5992 Opc = AArch64::FMSUBSrrr; 5993 RC = &AArch64::FPR32RegClass; 5994 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5995 break; 5996 case MachineCombinerPattern::FMULSUBD_OP2: 5997 Opc = AArch64::FMSUBDrrr; 5998 RC = &AArch64::FPR64RegClass; 5999 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 6000 break; 6001 6002 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 6003 Opc = AArch64::FMLSv1i32_indexed; 6004 RC = &AArch64::FPR32RegClass; 6005 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6006 FMAInstKind::Indexed); 6007 break; 6008 6009 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 6010 Opc = AArch64::FMLSv1i64_indexed; 6011 RC = &AArch64::FPR64RegClass; 6012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6013 FMAInstKind::Indexed); 6014 break; 6015 6016 case MachineCombinerPattern::FMLSv4f16_OP1: 6017 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { 6018 RC = &AArch64::FPR64RegClass; 6019 Register NewVR = MRI.createVirtualRegister(RC); 6020 MachineInstrBuilder MIB1 = 6021 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) 6022 .add(Root.getOperand(2)); 6023 InsInstrs.push_back(MIB1); 6024 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6025 if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { 6026 Opc = AArch64::FMLAv4f16; 6027 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6028 FMAInstKind::Accumulator, &NewVR); 6029 } else { 6030 Opc = AArch64::FMLAv4i16_indexed; 6031 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6032 FMAInstKind::Indexed, &NewVR); 6033 } 6034 break; 6035 } 6036 case MachineCombinerPattern::FMLSv4f16_OP2: 6037 RC = &AArch64::FPR64RegClass; 6038 Opc = AArch64::FMLSv4f16; 6039 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6040 FMAInstKind::Accumulator); 6041 break; 6042 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 6043 RC = &AArch64::FPR64RegClass; 6044 Opc = AArch64::FMLSv4i16_indexed; 6045 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6046 FMAInstKind::Indexed); 6047 break; 6048 6049 case MachineCombinerPattern::FMLSv2f32_OP2: 6050 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 6051 RC = &AArch64::FPR64RegClass; 6052 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 6053 Opc = AArch64::FMLSv2i32_indexed; 6054 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6055 FMAInstKind::Indexed); 6056 } else { 6057 Opc = AArch64::FMLSv2f32; 6058 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6059 FMAInstKind::Accumulator); 6060 } 6061 break; 6062 6063 case MachineCombinerPattern::FMLSv8f16_OP1: 6064 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { 6065 RC = &AArch64::FPR128RegClass; 6066 Register NewVR = MRI.createVirtualRegister(RC); 6067 MachineInstrBuilder MIB1 = 6068 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) 6069 .add(Root.getOperand(2)); 6070 InsInstrs.push_back(MIB1); 6071 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6072 if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { 6073 Opc = AArch64::FMLAv8f16; 6074 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6075 FMAInstKind::Accumulator, &NewVR); 6076 } else { 6077 Opc = AArch64::FMLAv8i16_indexed; 6078 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6079 FMAInstKind::Indexed, &NewVR); 6080 } 6081 break; 6082 } 6083 case MachineCombinerPattern::FMLSv8f16_OP2: 6084 RC = &AArch64::FPR128RegClass; 6085 Opc = AArch64::FMLSv8f16; 6086 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6087 FMAInstKind::Accumulator); 6088 break; 6089 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 6090 RC = &AArch64::FPR128RegClass; 6091 Opc = AArch64::FMLSv8i16_indexed; 6092 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6093 FMAInstKind::Indexed); 6094 break; 6095 6096 case MachineCombinerPattern::FMLSv2f64_OP2: 6097 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 6098 RC = &AArch64::FPR128RegClass; 6099 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 6100 Opc = AArch64::FMLSv2i64_indexed; 6101 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6102 FMAInstKind::Indexed); 6103 } else { 6104 Opc = AArch64::FMLSv2f64; 6105 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6106 FMAInstKind::Accumulator); 6107 } 6108 break; 6109 6110 case MachineCombinerPattern::FMLSv4f32_OP2: 6111 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 6112 RC = &AArch64::FPR128RegClass; 6113 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 6114 Opc = AArch64::FMLSv4i32_indexed; 6115 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6116 FMAInstKind::Indexed); 6117 } else { 6118 Opc = AArch64::FMLSv4f32; 6119 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 6120 FMAInstKind::Accumulator); 6121 } 6122 break; 6123 case MachineCombinerPattern::FMLSv2f32_OP1: 6124 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 6125 RC = &AArch64::FPR64RegClass; 6126 Register NewVR = MRI.createVirtualRegister(RC); 6127 MachineInstrBuilder MIB1 = 6128 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 6129 .add(Root.getOperand(2)); 6130 InsInstrs.push_back(MIB1); 6131 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6132 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 6133 Opc = AArch64::FMLAv2i32_indexed; 6134 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6135 FMAInstKind::Indexed, &NewVR); 6136 } else { 6137 Opc = AArch64::FMLAv2f32; 6138 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6139 FMAInstKind::Accumulator, &NewVR); 6140 } 6141 break; 6142 } 6143 case MachineCombinerPattern::FMLSv4f32_OP1: 6144 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 6145 RC = &AArch64::FPR128RegClass; 6146 Register NewVR = MRI.createVirtualRegister(RC); 6147 MachineInstrBuilder MIB1 = 6148 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 6149 .add(Root.getOperand(2)); 6150 InsInstrs.push_back(MIB1); 6151 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6152 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 6153 Opc = AArch64::FMLAv4i32_indexed; 6154 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6155 FMAInstKind::Indexed, &NewVR); 6156 } else { 6157 Opc = AArch64::FMLAv4f32; 6158 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6159 FMAInstKind::Accumulator, &NewVR); 6160 } 6161 break; 6162 } 6163 case MachineCombinerPattern::FMLSv2f64_OP1: 6164 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 6165 RC = &AArch64::FPR128RegClass; 6166 Register NewVR = MRI.createVirtualRegister(RC); 6167 MachineInstrBuilder MIB1 = 6168 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 6169 .add(Root.getOperand(2)); 6170 InsInstrs.push_back(MIB1); 6171 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 6172 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 6173 Opc = AArch64::FMLAv2i64_indexed; 6174 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6175 FMAInstKind::Indexed, &NewVR); 6176 } else { 6177 Opc = AArch64::FMLAv2f64; 6178 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 6179 FMAInstKind::Accumulator, &NewVR); 6180 } 6181 break; 6182 } 6183 case MachineCombinerPattern::FMULv2i32_indexed_OP1: 6184 case MachineCombinerPattern::FMULv2i32_indexed_OP2: { 6185 unsigned IdxDupOp = 6186 (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2; 6187 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, 6188 &AArch64::FPR128RegClass, MRI); 6189 break; 6190 } 6191 case MachineCombinerPattern::FMULv2i64_indexed_OP1: 6192 case MachineCombinerPattern::FMULv2i64_indexed_OP2: { 6193 unsigned IdxDupOp = 6194 (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2; 6195 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, 6196 &AArch64::FPR128RegClass, MRI); 6197 break; 6198 } 6199 case MachineCombinerPattern::FMULv4i16_indexed_OP1: 6200 case MachineCombinerPattern::FMULv4i16_indexed_OP2: { 6201 unsigned IdxDupOp = 6202 (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2; 6203 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, 6204 &AArch64::FPR128_loRegClass, MRI); 6205 break; 6206 } 6207 case MachineCombinerPattern::FMULv4i32_indexed_OP1: 6208 case MachineCombinerPattern::FMULv4i32_indexed_OP2: { 6209 unsigned IdxDupOp = 6210 (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2; 6211 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, 6212 &AArch64::FPR128RegClass, MRI); 6213 break; 6214 } 6215 case MachineCombinerPattern::FMULv8i16_indexed_OP1: 6216 case MachineCombinerPattern::FMULv8i16_indexed_OP2: { 6217 unsigned IdxDupOp = 6218 (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2; 6219 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, 6220 &AArch64::FPR128_loRegClass, MRI); 6221 break; 6222 } 6223 } // end switch (Pattern) 6224 // Record MUL and ADD/SUB for deletion 6225 if (MUL) 6226 DelInstrs.push_back(MUL); 6227 DelInstrs.push_back(&Root); 6228 } 6229 6230 /// Replace csincr-branch sequence by simple conditional branch 6231 /// 6232 /// Examples: 6233 /// 1. \code 6234 /// csinc w9, wzr, wzr, <condition code> 6235 /// tbnz w9, #0, 0x44 6236 /// \endcode 6237 /// to 6238 /// \code 6239 /// b.<inverted condition code> 6240 /// \endcode 6241 /// 6242 /// 2. \code 6243 /// csinc w9, wzr, wzr, <condition code> 6244 /// tbz w9, #0, 0x44 6245 /// \endcode 6246 /// to 6247 /// \code 6248 /// b.<condition code> 6249 /// \endcode 6250 /// 6251 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 6252 /// compare's constant operand is power of 2. 6253 /// 6254 /// Examples: 6255 /// \code 6256 /// and w8, w8, #0x400 6257 /// cbnz w8, L1 6258 /// \endcode 6259 /// to 6260 /// \code 6261 /// tbnz w8, #10, L1 6262 /// \endcode 6263 /// 6264 /// \param MI Conditional Branch 6265 /// \return True when the simple conditional branch is generated 6266 /// 6267 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 6268 bool IsNegativeBranch = false; 6269 bool IsTestAndBranch = false; 6270 unsigned TargetBBInMI = 0; 6271 switch (MI.getOpcode()) { 6272 default: 6273 llvm_unreachable("Unknown branch instruction?"); 6274 case AArch64::Bcc: 6275 return false; 6276 case AArch64::CBZW: 6277 case AArch64::CBZX: 6278 TargetBBInMI = 1; 6279 break; 6280 case AArch64::CBNZW: 6281 case AArch64::CBNZX: 6282 TargetBBInMI = 1; 6283 IsNegativeBranch = true; 6284 break; 6285 case AArch64::TBZW: 6286 case AArch64::TBZX: 6287 TargetBBInMI = 2; 6288 IsTestAndBranch = true; 6289 break; 6290 case AArch64::TBNZW: 6291 case AArch64::TBNZX: 6292 TargetBBInMI = 2; 6293 IsNegativeBranch = true; 6294 IsTestAndBranch = true; 6295 break; 6296 } 6297 // So we increment a zero register and test for bits other 6298 // than bit 0? Conservatively bail out in case the verifier 6299 // missed this case. 6300 if (IsTestAndBranch && MI.getOperand(1).getImm()) 6301 return false; 6302 6303 // Find Definition. 6304 assert(MI.getParent() && "Incomplete machine instruciton\n"); 6305 MachineBasicBlock *MBB = MI.getParent(); 6306 MachineFunction *MF = MBB->getParent(); 6307 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6308 Register VReg = MI.getOperand(0).getReg(); 6309 if (!Register::isVirtualRegister(VReg)) 6310 return false; 6311 6312 MachineInstr *DefMI = MRI->getVRegDef(VReg); 6313 6314 // Look through COPY instructions to find definition. 6315 while (DefMI->isCopy()) { 6316 Register CopyVReg = DefMI->getOperand(1).getReg(); 6317 if (!MRI->hasOneNonDBGUse(CopyVReg)) 6318 return false; 6319 if (!MRI->hasOneDef(CopyVReg)) 6320 return false; 6321 DefMI = MRI->getVRegDef(CopyVReg); 6322 } 6323 6324 switch (DefMI->getOpcode()) { 6325 default: 6326 return false; 6327 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 6328 case AArch64::ANDWri: 6329 case AArch64::ANDXri: { 6330 if (IsTestAndBranch) 6331 return false; 6332 if (DefMI->getParent() != MBB) 6333 return false; 6334 if (!MRI->hasOneNonDBGUse(VReg)) 6335 return false; 6336 6337 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 6338 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 6339 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 6340 if (!isPowerOf2_64(Mask)) 6341 return false; 6342 6343 MachineOperand &MO = DefMI->getOperand(1); 6344 Register NewReg = MO.getReg(); 6345 if (!Register::isVirtualRegister(NewReg)) 6346 return false; 6347 6348 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 6349 6350 MachineBasicBlock &RefToMBB = *MBB; 6351 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 6352 DebugLoc DL = MI.getDebugLoc(); 6353 unsigned Imm = Log2_64(Mask); 6354 unsigned Opc = (Imm < 32) 6355 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 6356 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 6357 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 6358 .addReg(NewReg) 6359 .addImm(Imm) 6360 .addMBB(TBB); 6361 // Register lives on to the CBZ now. 6362 MO.setIsKill(false); 6363 6364 // For immediate smaller than 32, we need to use the 32-bit 6365 // variant (W) in all cases. Indeed the 64-bit variant does not 6366 // allow to encode them. 6367 // Therefore, if the input register is 64-bit, we need to take the 6368 // 32-bit sub-part. 6369 if (!Is32Bit && Imm < 32) 6370 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 6371 MI.eraseFromParent(); 6372 return true; 6373 } 6374 // Look for CSINC 6375 case AArch64::CSINCWr: 6376 case AArch64::CSINCXr: { 6377 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 6378 DefMI->getOperand(2).getReg() == AArch64::WZR) && 6379 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 6380 DefMI->getOperand(2).getReg() == AArch64::XZR)) 6381 return false; 6382 6383 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 6384 return false; 6385 6386 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 6387 // Convert only when the condition code is not modified between 6388 // the CSINC and the branch. The CC may be used by other 6389 // instructions in between. 6390 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 6391 return false; 6392 MachineBasicBlock &RefToMBB = *MBB; 6393 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 6394 DebugLoc DL = MI.getDebugLoc(); 6395 if (IsNegativeBranch) 6396 CC = AArch64CC::getInvertedCondCode(CC); 6397 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 6398 MI.eraseFromParent(); 6399 return true; 6400 } 6401 } 6402 } 6403 6404 std::pair<unsigned, unsigned> 6405 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 6406 const unsigned Mask = AArch64II::MO_FRAGMENT; 6407 return std::make_pair(TF & Mask, TF & ~Mask); 6408 } 6409 6410 ArrayRef<std::pair<unsigned, const char *>> 6411 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 6412 using namespace AArch64II; 6413 6414 static const std::pair<unsigned, const char *> TargetFlags[] = { 6415 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 6416 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 6417 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 6418 {MO_HI12, "aarch64-hi12"}}; 6419 return makeArrayRef(TargetFlags); 6420 } 6421 6422 ArrayRef<std::pair<unsigned, const char *>> 6423 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 6424 using namespace AArch64II; 6425 6426 static const std::pair<unsigned, const char *> TargetFlags[] = { 6427 {MO_COFFSTUB, "aarch64-coffstub"}, 6428 {MO_GOT, "aarch64-got"}, 6429 {MO_NC, "aarch64-nc"}, 6430 {MO_S, "aarch64-s"}, 6431 {MO_TLS, "aarch64-tls"}, 6432 {MO_DLLIMPORT, "aarch64-dllimport"}, 6433 {MO_PREL, "aarch64-prel"}, 6434 {MO_TAGGED, "aarch64-tagged"}}; 6435 return makeArrayRef(TargetFlags); 6436 } 6437 6438 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 6439 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 6440 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 6441 {{MOSuppressPair, "aarch64-suppress-pair"}, 6442 {MOStridedAccess, "aarch64-strided-access"}}; 6443 return makeArrayRef(TargetFlags); 6444 } 6445 6446 /// Constants defining how certain sequences should be outlined. 6447 /// This encompasses how an outlined function should be called, and what kind of 6448 /// frame should be emitted for that outlined function. 6449 /// 6450 /// \p MachineOutlinerDefault implies that the function should be called with 6451 /// a save and restore of LR to the stack. 6452 /// 6453 /// That is, 6454 /// 6455 /// I1 Save LR OUTLINED_FUNCTION: 6456 /// I2 --> BL OUTLINED_FUNCTION I1 6457 /// I3 Restore LR I2 6458 /// I3 6459 /// RET 6460 /// 6461 /// * Call construction overhead: 3 (save + BL + restore) 6462 /// * Frame construction overhead: 1 (ret) 6463 /// * Requires stack fixups? Yes 6464 /// 6465 /// \p MachineOutlinerTailCall implies that the function is being created from 6466 /// a sequence of instructions ending in a return. 6467 /// 6468 /// That is, 6469 /// 6470 /// I1 OUTLINED_FUNCTION: 6471 /// I2 --> B OUTLINED_FUNCTION I1 6472 /// RET I2 6473 /// RET 6474 /// 6475 /// * Call construction overhead: 1 (B) 6476 /// * Frame construction overhead: 0 (Return included in sequence) 6477 /// * Requires stack fixups? No 6478 /// 6479 /// \p MachineOutlinerNoLRSave implies that the function should be called using 6480 /// a BL instruction, but doesn't require LR to be saved and restored. This 6481 /// happens when LR is known to be dead. 6482 /// 6483 /// That is, 6484 /// 6485 /// I1 OUTLINED_FUNCTION: 6486 /// I2 --> BL OUTLINED_FUNCTION I1 6487 /// I3 I2 6488 /// I3 6489 /// RET 6490 /// 6491 /// * Call construction overhead: 1 (BL) 6492 /// * Frame construction overhead: 1 (RET) 6493 /// * Requires stack fixups? No 6494 /// 6495 /// \p MachineOutlinerThunk implies that the function is being created from 6496 /// a sequence of instructions ending in a call. The outlined function is 6497 /// called with a BL instruction, and the outlined function tail-calls the 6498 /// original call destination. 6499 /// 6500 /// That is, 6501 /// 6502 /// I1 OUTLINED_FUNCTION: 6503 /// I2 --> BL OUTLINED_FUNCTION I1 6504 /// BL f I2 6505 /// B f 6506 /// * Call construction overhead: 1 (BL) 6507 /// * Frame construction overhead: 0 6508 /// * Requires stack fixups? No 6509 /// 6510 /// \p MachineOutlinerRegSave implies that the function should be called with a 6511 /// save and restore of LR to an available register. This allows us to avoid 6512 /// stack fixups. Note that this outlining variant is compatible with the 6513 /// NoLRSave case. 6514 /// 6515 /// That is, 6516 /// 6517 /// I1 Save LR OUTLINED_FUNCTION: 6518 /// I2 --> BL OUTLINED_FUNCTION I1 6519 /// I3 Restore LR I2 6520 /// I3 6521 /// RET 6522 /// 6523 /// * Call construction overhead: 3 (save + BL + restore) 6524 /// * Frame construction overhead: 1 (ret) 6525 /// * Requires stack fixups? No 6526 enum MachineOutlinerClass { 6527 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 6528 MachineOutlinerTailCall, /// Only emit a branch. 6529 MachineOutlinerNoLRSave, /// Emit a call and return. 6530 MachineOutlinerThunk, /// Emit a call and tail-call. 6531 MachineOutlinerRegSave /// Same as default, but save to a register. 6532 }; 6533 6534 enum MachineOutlinerMBBFlags { 6535 LRUnavailableSomewhere = 0x2, 6536 HasCalls = 0x4, 6537 UnsafeRegsDead = 0x8 6538 }; 6539 6540 unsigned 6541 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 6542 assert(C.LRUWasSet && "LRU wasn't set?"); 6543 MachineFunction *MF = C.getMF(); 6544 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 6545 MF->getSubtarget().getRegisterInfo()); 6546 6547 // Check if there is an available register across the sequence that we can 6548 // use. 6549 for (unsigned Reg : AArch64::GPR64RegClass) { 6550 if (!ARI->isReservedReg(*MF, Reg) && 6551 Reg != AArch64::LR && // LR is not reserved, but don't use it. 6552 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 6553 Reg != AArch64::X17 && // Ditto for X17. 6554 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 6555 return Reg; 6556 } 6557 6558 // No suitable register. Return 0. 6559 return 0u; 6560 } 6561 6562 static bool 6563 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, 6564 const outliner::Candidate &b) { 6565 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 6566 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 6567 6568 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && 6569 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); 6570 } 6571 6572 static bool 6573 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, 6574 const outliner::Candidate &b) { 6575 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 6576 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 6577 6578 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); 6579 } 6580 6581 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, 6582 const outliner::Candidate &b) { 6583 const AArch64Subtarget &SubtargetA = 6584 a.getMF()->getSubtarget<AArch64Subtarget>(); 6585 const AArch64Subtarget &SubtargetB = 6586 b.getMF()->getSubtarget<AArch64Subtarget>(); 6587 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); 6588 } 6589 6590 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( 6591 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 6592 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 6593 unsigned SequenceSize = 6594 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 6595 [this](unsigned Sum, const MachineInstr &MI) { 6596 return Sum + getInstSizeInBytes(MI); 6597 }); 6598 unsigned NumBytesToCreateFrame = 0; 6599 6600 // We only allow outlining for functions having exactly matching return 6601 // address signing attributes, i.e., all share the same value for the 6602 // attribute "sign-return-address" and all share the same type of key they 6603 // are signed with. 6604 // Additionally we require all functions to simultaniously either support 6605 // v8.3a features or not. Otherwise an outlined function could get signed 6606 // using dedicated v8.3 instructions and a call from a function that doesn't 6607 // support v8.3 instructions would therefore be invalid. 6608 if (std::adjacent_find( 6609 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 6610 [](const outliner::Candidate &a, const outliner::Candidate &b) { 6611 // Return true if a and b are non-equal w.r.t. return address 6612 // signing or support of v8.3a features 6613 if (outliningCandidatesSigningScopeConsensus(a, b) && 6614 outliningCandidatesSigningKeyConsensus(a, b) && 6615 outliningCandidatesV8_3OpsConsensus(a, b)) { 6616 return false; 6617 } 6618 return true; 6619 }) != RepeatedSequenceLocs.end()) { 6620 return outliner::OutlinedFunction(); 6621 } 6622 6623 // Since at this point all candidates agree on their return address signing 6624 // picking just one is fine. If the candidate functions potentially sign their 6625 // return addresses, the outlined function should do the same. Note that in 6626 // the case of "sign-return-address"="non-leaf" this is an assumption: It is 6627 // not certainly true that the outlined function will have to sign its return 6628 // address but this decision is made later, when the decision to outline 6629 // has already been made. 6630 // The same holds for the number of additional instructions we need: On 6631 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is 6632 // necessary. However, at this point we don't know if the outlined function 6633 // will have a RET instruction so we assume the worst. 6634 const TargetRegisterInfo &TRI = getRegisterInfo(); 6635 if (FirstCand.getMF() 6636 ->getInfo<AArch64FunctionInfo>() 6637 ->shouldSignReturnAddress(true)) { 6638 // One PAC and one AUT instructions 6639 NumBytesToCreateFrame += 8; 6640 6641 // We have to check if sp modifying instructions would get outlined. 6642 // If so we only allow outlining if sp is unchanged overall, so matching 6643 // sub and add instructions are okay to outline, all other sp modifications 6644 // are not 6645 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { 6646 int SPValue = 0; 6647 MachineBasicBlock::iterator MBBI = C.front(); 6648 for (;;) { 6649 if (MBBI->modifiesRegister(AArch64::SP, &TRI)) { 6650 switch (MBBI->getOpcode()) { 6651 case AArch64::ADDXri: 6652 case AArch64::ADDWri: 6653 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 6654 assert(MBBI->getOperand(2).isImm() && 6655 "Expected operand to be immediate"); 6656 assert(MBBI->getOperand(1).isReg() && 6657 "Expected operand to be a register"); 6658 // Check if the add just increments sp. If so, we search for 6659 // matching sub instructions that decrement sp. If not, the 6660 // modification is illegal 6661 if (MBBI->getOperand(1).getReg() == AArch64::SP) 6662 SPValue += MBBI->getOperand(2).getImm(); 6663 else 6664 return true; 6665 break; 6666 case AArch64::SUBXri: 6667 case AArch64::SUBWri: 6668 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 6669 assert(MBBI->getOperand(2).isImm() && 6670 "Expected operand to be immediate"); 6671 assert(MBBI->getOperand(1).isReg() && 6672 "Expected operand to be a register"); 6673 // Check if the sub just decrements sp. If so, we search for 6674 // matching add instructions that increment sp. If not, the 6675 // modification is illegal 6676 if (MBBI->getOperand(1).getReg() == AArch64::SP) 6677 SPValue -= MBBI->getOperand(2).getImm(); 6678 else 6679 return true; 6680 break; 6681 default: 6682 return true; 6683 } 6684 } 6685 if (MBBI == C.back()) 6686 break; 6687 ++MBBI; 6688 } 6689 if (SPValue) 6690 return true; 6691 return false; 6692 }; 6693 // Remove candidates with illegal stack modifying instructions 6694 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); 6695 6696 // If the sequence doesn't have enough candidates left, then we're done. 6697 if (RepeatedSequenceLocs.size() < 2) 6698 return outliner::OutlinedFunction(); 6699 } 6700 6701 // Properties about candidate MBBs that hold for all of them. 6702 unsigned FlagsSetInAll = 0xF; 6703 6704 // Compute liveness information for each candidate, and set FlagsSetInAll. 6705 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 6706 [&FlagsSetInAll](outliner::Candidate &C) { 6707 FlagsSetInAll &= C.Flags; 6708 }); 6709 6710 // According to the AArch64 Procedure Call Standard, the following are 6711 // undefined on entry/exit from a function call: 6712 // 6713 // * Registers x16, x17, (and thus w16, w17) 6714 // * Condition codes (and thus the NZCV register) 6715 // 6716 // Because if this, we can't outline any sequence of instructions where 6717 // one 6718 // of these registers is live into/across it. Thus, we need to delete 6719 // those 6720 // candidates. 6721 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 6722 // If the unsafe registers in this block are all dead, then we don't need 6723 // to compute liveness here. 6724 if (C.Flags & UnsafeRegsDead) 6725 return false; 6726 C.initLRU(TRI); 6727 LiveRegUnits LRU = C.LRU; 6728 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 6729 !LRU.available(AArch64::NZCV)); 6730 }; 6731 6732 // Are there any candidates where those registers are live? 6733 if (!(FlagsSetInAll & UnsafeRegsDead)) { 6734 // Erase every candidate that violates the restrictions above. (It could be 6735 // true that we have viable candidates, so it's not worth bailing out in 6736 // the case that, say, 1 out of 20 candidates violate the restructions.) 6737 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); 6738 6739 // If the sequence doesn't have enough candidates left, then we're done. 6740 if (RepeatedSequenceLocs.size() < 2) 6741 return outliner::OutlinedFunction(); 6742 } 6743 6744 // At this point, we have only "safe" candidates to outline. Figure out 6745 // frame + call instruction information. 6746 6747 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 6748 6749 // Helper lambda which sets call information for every candidate. 6750 auto SetCandidateCallInfo = 6751 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 6752 for (outliner::Candidate &C : RepeatedSequenceLocs) 6753 C.setCallInfo(CallID, NumBytesForCall); 6754 }; 6755 6756 unsigned FrameID = MachineOutlinerDefault; 6757 NumBytesToCreateFrame += 4; 6758 6759 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 6760 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); 6761 }); 6762 6763 // We check to see if CFI Instructions are present, and if they are 6764 // we find the number of CFI Instructions in the candidates. 6765 unsigned CFICount = 0; 6766 MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); 6767 for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); 6768 Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { 6769 if (MBBI->isCFIInstruction()) 6770 CFICount++; 6771 MBBI++; 6772 } 6773 6774 // We compare the number of found CFI Instructions to the number of CFI 6775 // instructions in the parent function for each candidate. We must check this 6776 // since if we outline one of the CFI instructions in a function, we have to 6777 // outline them all for correctness. If we do not, the address offsets will be 6778 // incorrect between the two sections of the program. 6779 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6780 std::vector<MCCFIInstruction> CFIInstructions = 6781 C.getMF()->getFrameInstructions(); 6782 6783 if (CFICount > 0 && CFICount != CFIInstructions.size()) 6784 return outliner::OutlinedFunction(); 6785 } 6786 6787 // Returns true if an instructions is safe to fix up, false otherwise. 6788 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 6789 if (MI.isCall()) 6790 return true; 6791 6792 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 6793 !MI.readsRegister(AArch64::SP, &TRI)) 6794 return true; 6795 6796 // Any modification of SP will break our code to save/restore LR. 6797 // FIXME: We could handle some instructions which add a constant 6798 // offset to SP, with a bit more work. 6799 if (MI.modifiesRegister(AArch64::SP, &TRI)) 6800 return false; 6801 6802 // At this point, we have a stack instruction that we might need to 6803 // fix up. We'll handle it if it's a load or store. 6804 if (MI.mayLoadOrStore()) { 6805 const MachineOperand *Base; // Filled with the base operand of MI. 6806 int64_t Offset; // Filled with the offset of MI. 6807 bool OffsetIsScalable; 6808 6809 // Does it allow us to offset the base operand and is the base the 6810 // register SP? 6811 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || 6812 !Base->isReg() || Base->getReg() != AArch64::SP) 6813 return false; 6814 6815 // Fixe-up code below assumes bytes. 6816 if (OffsetIsScalable) 6817 return false; 6818 6819 // Find the minimum/maximum offset for this instruction and check 6820 // if fixing it up would be in range. 6821 int64_t MinOffset, 6822 MaxOffset; // Unscaled offsets for the instruction. 6823 TypeSize Scale(0U, false); // The scale to multiply the offsets by. 6824 unsigned DummyWidth; 6825 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 6826 6827 Offset += 16; // Update the offset to what it would be if we outlined. 6828 if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || 6829 Offset > MaxOffset * (int64_t)Scale.getFixedSize()) 6830 return false; 6831 6832 // It's in range, so we can outline it. 6833 return true; 6834 } 6835 6836 // FIXME: Add handling for instructions like "add x0, sp, #8". 6837 6838 // We can't fix it up, so don't outline it. 6839 return false; 6840 }; 6841 6842 // True if it's possible to fix up each stack instruction in this sequence. 6843 // Important for frames/call variants that modify the stack. 6844 bool AllStackInstrsSafe = std::all_of( 6845 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 6846 6847 // If the last instruction in any candidate is a terminator, then we should 6848 // tail call all of the candidates. 6849 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 6850 FrameID = MachineOutlinerTailCall; 6851 NumBytesToCreateFrame = 0; 6852 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 6853 } 6854 6855 else if (LastInstrOpcode == AArch64::BL || 6856 ((LastInstrOpcode == AArch64::BLR || 6857 LastInstrOpcode == AArch64::BLRNoIP) && 6858 !HasBTI)) { 6859 // FIXME: Do we need to check if the code after this uses the value of LR? 6860 FrameID = MachineOutlinerThunk; 6861 NumBytesToCreateFrame = 0; 6862 SetCandidateCallInfo(MachineOutlinerThunk, 4); 6863 } 6864 6865 else { 6866 // We need to decide how to emit calls + frames. We can always emit the same 6867 // frame if we don't need to save to the stack. If we have to save to the 6868 // stack, then we need a different frame. 6869 unsigned NumBytesNoStackCalls = 0; 6870 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 6871 6872 // Check if we have to save LR. 6873 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6874 C.initLRU(TRI); 6875 6876 // If we have a noreturn caller, then we're going to be conservative and 6877 // say that we have to save LR. If we don't have a ret at the end of the 6878 // block, then we can't reason about liveness accurately. 6879 // 6880 // FIXME: We can probably do better than always disabling this in 6881 // noreturn functions by fixing up the liveness info. 6882 bool IsNoReturn = 6883 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); 6884 6885 // Is LR available? If so, we don't need a save. 6886 if (C.LRU.available(AArch64::LR) && !IsNoReturn) { 6887 NumBytesNoStackCalls += 4; 6888 C.setCallInfo(MachineOutlinerNoLRSave, 4); 6889 CandidatesWithoutStackFixups.push_back(C); 6890 } 6891 6892 // Is an unused register available? If so, we won't modify the stack, so 6893 // we can outline with the same frame type as those that don't save LR. 6894 else if (findRegisterToSaveLRTo(C)) { 6895 NumBytesNoStackCalls += 12; 6896 C.setCallInfo(MachineOutlinerRegSave, 12); 6897 CandidatesWithoutStackFixups.push_back(C); 6898 } 6899 6900 // Is SP used in the sequence at all? If not, we don't have to modify 6901 // the stack, so we are guaranteed to get the same frame. 6902 else if (C.UsedInSequence.available(AArch64::SP)) { 6903 NumBytesNoStackCalls += 12; 6904 C.setCallInfo(MachineOutlinerDefault, 12); 6905 CandidatesWithoutStackFixups.push_back(C); 6906 } 6907 6908 // If we outline this, we need to modify the stack. Pretend we don't 6909 // outline this by saving all of its bytes. 6910 else { 6911 NumBytesNoStackCalls += SequenceSize; 6912 } 6913 } 6914 6915 // If there are no places where we have to save LR, then note that we 6916 // don't have to update the stack. Otherwise, give every candidate the 6917 // default call type, as long as it's safe to do so. 6918 if (!AllStackInstrsSafe || 6919 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 6920 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 6921 FrameID = MachineOutlinerNoLRSave; 6922 } else { 6923 SetCandidateCallInfo(MachineOutlinerDefault, 12); 6924 6925 // Bugzilla ID: 46767 6926 // TODO: Check if fixing up the stack more than once is safe so we can 6927 // outline these. 6928 // 6929 // An outline resulting in a caller that requires stack fixups at the 6930 // callsite to a callee that also requires stack fixups can happen when 6931 // there are no available registers at the candidate callsite for a 6932 // candidate that itself also has calls. 6933 // 6934 // In other words if function_containing_sequence in the following pseudo 6935 // assembly requires that we save LR at the point of the call, but there 6936 // are no available registers: in this case we save using SP and as a 6937 // result the SP offsets requires stack fixups by multiples of 16. 6938 // 6939 // function_containing_sequence: 6940 // ... 6941 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 6942 // call OUTLINED_FUNCTION_N 6943 // restore LR from SP 6944 // ... 6945 // 6946 // OUTLINED_FUNCTION_N: 6947 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 6948 // ... 6949 // bl foo 6950 // restore LR from SP 6951 // ret 6952 // 6953 // Because the code to handle more than one stack fixup does not 6954 // currently have the proper checks for legality, these cases will assert 6955 // in the AArch64 MachineOutliner. This is because the code to do this 6956 // needs more hardening, testing, better checks that generated code is 6957 // legal, etc and because it is only verified to handle a single pass of 6958 // stack fixup. 6959 // 6960 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch 6961 // these cases until they are known to be handled. Bugzilla 46767 is 6962 // referenced in comments at the assert site. 6963 // 6964 // To avoid asserting (or generating non-legal code on noassert builds) 6965 // we remove all candidates which would need more than one stack fixup by 6966 // pruning the cases where the candidate has calls while also having no 6967 // available LR and having no available general purpose registers to copy 6968 // LR to (ie one extra stack save/restore). 6969 // 6970 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6971 erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { 6972 return (std::any_of( 6973 C.front(), std::next(C.back()), 6974 [](const MachineInstr &MI) { return MI.isCall(); })) && 6975 (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); 6976 }); 6977 } 6978 } 6979 6980 // If we dropped all of the candidates, bail out here. 6981 if (RepeatedSequenceLocs.size() < 2) { 6982 RepeatedSequenceLocs.clear(); 6983 return outliner::OutlinedFunction(); 6984 } 6985 } 6986 6987 // Does every candidate's MBB contain a call? If so, then we might have a call 6988 // in the range. 6989 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6990 // Check if the range contains a call. These require a save + restore of the 6991 // link register. 6992 bool ModStackToSaveLR = false; 6993 if (std::any_of(FirstCand.front(), FirstCand.back(), 6994 [](const MachineInstr &MI) { return MI.isCall(); })) 6995 ModStackToSaveLR = true; 6996 6997 // Handle the last instruction separately. If this is a tail call, then the 6998 // last instruction is a call. We don't want to save + restore in this case. 6999 // However, it could be possible that the last instruction is a call without 7000 // it being valid to tail call this sequence. We should consider this as 7001 // well. 7002 else if (FrameID != MachineOutlinerThunk && 7003 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 7004 ModStackToSaveLR = true; 7005 7006 if (ModStackToSaveLR) { 7007 // We can't fix up the stack. Bail out. 7008 if (!AllStackInstrsSafe) { 7009 RepeatedSequenceLocs.clear(); 7010 return outliner::OutlinedFunction(); 7011 } 7012 7013 // Save + restore LR. 7014 NumBytesToCreateFrame += 8; 7015 } 7016 } 7017 7018 // If we have CFI instructions, we can only outline if the outlined section 7019 // can be a tail call 7020 if (FrameID != MachineOutlinerTailCall && CFICount > 0) 7021 return outliner::OutlinedFunction(); 7022 7023 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 7024 NumBytesToCreateFrame, FrameID); 7025 } 7026 7027 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 7028 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 7029 const Function &F = MF.getFunction(); 7030 7031 // Can F be deduplicated by the linker? If it can, don't outline from it. 7032 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 7033 return false; 7034 7035 // Don't outline from functions with section markings; the program could 7036 // expect that all the code is in the named section. 7037 // FIXME: Allow outlining from multiple functions with the same section 7038 // marking. 7039 if (F.hasSection()) 7040 return false; 7041 7042 // Outlining from functions with redzones is unsafe since the outliner may 7043 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 7044 // outline from it. 7045 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 7046 if (!AFI || AFI->hasRedZone().getValueOr(true)) 7047 return false; 7048 7049 // FIXME: Teach the outliner to generate/handle Windows unwind info. 7050 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 7051 return false; 7052 7053 // It's safe to outline from MF. 7054 return true; 7055 } 7056 7057 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 7058 unsigned &Flags) const { 7059 if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags)) 7060 return false; 7061 // Check if LR is available through all of the MBB. If it's not, then set 7062 // a flag. 7063 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 7064 "Suitable Machine Function for outlining must track liveness"); 7065 LiveRegUnits LRU(getRegisterInfo()); 7066 7067 std::for_each(MBB.rbegin(), MBB.rend(), 7068 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 7069 7070 // Check if each of the unsafe registers are available... 7071 bool W16AvailableInBlock = LRU.available(AArch64::W16); 7072 bool W17AvailableInBlock = LRU.available(AArch64::W17); 7073 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 7074 7075 // If all of these are dead (and not live out), we know we don't have to check 7076 // them later. 7077 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 7078 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 7079 7080 // Now, add the live outs to the set. 7081 LRU.addLiveOuts(MBB); 7082 7083 // If any of these registers is available in the MBB, but also a live out of 7084 // the block, then we know outlining is unsafe. 7085 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 7086 return false; 7087 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 7088 return false; 7089 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 7090 return false; 7091 7092 // Check if there's a call inside this MachineBasicBlock. If there is, then 7093 // set a flag. 7094 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 7095 Flags |= MachineOutlinerMBBFlags::HasCalls; 7096 7097 MachineFunction *MF = MBB.getParent(); 7098 7099 // In the event that we outline, we may have to save LR. If there is an 7100 // available register in the MBB, then we'll always save LR there. Check if 7101 // this is true. 7102 bool CanSaveLR = false; 7103 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 7104 MF->getSubtarget().getRegisterInfo()); 7105 7106 // Check if there is an available register across the sequence that we can 7107 // use. 7108 for (unsigned Reg : AArch64::GPR64RegClass) { 7109 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 7110 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 7111 CanSaveLR = true; 7112 break; 7113 } 7114 } 7115 7116 // Check if we have a register we can save LR to, and if LR was used 7117 // somewhere. If both of those things are true, then we need to evaluate the 7118 // safety of outlining stack instructions later. 7119 if (!CanSaveLR && !LRU.available(AArch64::LR)) 7120 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 7121 7122 return true; 7123 } 7124 7125 outliner::InstrType 7126 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 7127 unsigned Flags) const { 7128 MachineInstr &MI = *MIT; 7129 MachineBasicBlock *MBB = MI.getParent(); 7130 MachineFunction *MF = MBB->getParent(); 7131 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 7132 7133 // Don't outline anything used for return address signing. The outlined 7134 // function will get signed later if needed 7135 switch (MI.getOpcode()) { 7136 case AArch64::PACIASP: 7137 case AArch64::PACIBSP: 7138 case AArch64::AUTIASP: 7139 case AArch64::AUTIBSP: 7140 case AArch64::RETAA: 7141 case AArch64::RETAB: 7142 case AArch64::EMITBKEY: 7143 return outliner::InstrType::Illegal; 7144 } 7145 7146 // Don't outline LOHs. 7147 if (FuncInfo->getLOHRelated().count(&MI)) 7148 return outliner::InstrType::Illegal; 7149 7150 // We can only outline these if we will tail call the outlined function, or 7151 // fix up the CFI offsets. Currently, CFI instructions are outlined only if 7152 // in a tail call. 7153 // 7154 // FIXME: If the proper fixups for the offset are implemented, this should be 7155 // possible. 7156 if (MI.isCFIInstruction()) 7157 return outliner::InstrType::Legal; 7158 7159 // Don't allow debug values to impact outlining type. 7160 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 7161 return outliner::InstrType::Invisible; 7162 7163 // At this point, KILL instructions don't really tell us much so we can go 7164 // ahead and skip over them. 7165 if (MI.isKill()) 7166 return outliner::InstrType::Invisible; 7167 7168 // Is this a terminator for a basic block? 7169 if (MI.isTerminator()) { 7170 7171 // Is this the end of a function? 7172 if (MI.getParent()->succ_empty()) 7173 return outliner::InstrType::Legal; 7174 7175 // It's not, so don't outline it. 7176 return outliner::InstrType::Illegal; 7177 } 7178 7179 // Make sure none of the operands are un-outlinable. 7180 for (const MachineOperand &MOP : MI.operands()) { 7181 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 7182 MOP.isTargetIndex()) 7183 return outliner::InstrType::Illegal; 7184 7185 // If it uses LR or W30 explicitly, then don't touch it. 7186 if (MOP.isReg() && !MOP.isImplicit() && 7187 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 7188 return outliner::InstrType::Illegal; 7189 } 7190 7191 // Special cases for instructions that can always be outlined, but will fail 7192 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 7193 // be outlined because they don't require a *specific* value to be in LR. 7194 if (MI.getOpcode() == AArch64::ADRP) 7195 return outliner::InstrType::Legal; 7196 7197 // If MI is a call we might be able to outline it. We don't want to outline 7198 // any calls that rely on the position of items on the stack. When we outline 7199 // something containing a call, we have to emit a save and restore of LR in 7200 // the outlined function. Currently, this always happens by saving LR to the 7201 // stack. Thus, if we outline, say, half the parameters for a function call 7202 // plus the call, then we'll break the callee's expectations for the layout 7203 // of the stack. 7204 // 7205 // FIXME: Allow calls to functions which construct a stack frame, as long 7206 // as they don't access arguments on the stack. 7207 // FIXME: Figure out some way to analyze functions defined in other modules. 7208 // We should be able to compute the memory usage based on the IR calling 7209 // convention, even if we can't see the definition. 7210 if (MI.isCall()) { 7211 // Get the function associated with the call. Look at each operand and find 7212 // the one that represents the callee and get its name. 7213 const Function *Callee = nullptr; 7214 for (const MachineOperand &MOP : MI.operands()) { 7215 if (MOP.isGlobal()) { 7216 Callee = dyn_cast<Function>(MOP.getGlobal()); 7217 break; 7218 } 7219 } 7220 7221 // Never outline calls to mcount. There isn't any rule that would require 7222 // this, but the Linux kernel's "ftrace" feature depends on it. 7223 if (Callee && Callee->getName() == "\01_mcount") 7224 return outliner::InstrType::Illegal; 7225 7226 // If we don't know anything about the callee, assume it depends on the 7227 // stack layout of the caller. In that case, it's only legal to outline 7228 // as a tail-call. Explicitly list the call instructions we know about so we 7229 // don't get unexpected results with call pseudo-instructions. 7230 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 7231 if (MI.getOpcode() == AArch64::BLR || 7232 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) 7233 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 7234 7235 if (!Callee) 7236 return UnknownCallOutlineType; 7237 7238 // We have a function we have information about. Check it if it's something 7239 // can safely outline. 7240 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 7241 7242 // We don't know what's going on with the callee at all. Don't touch it. 7243 if (!CalleeMF) 7244 return UnknownCallOutlineType; 7245 7246 // Check if we know anything about the callee saves on the function. If we 7247 // don't, then don't touch it, since that implies that we haven't 7248 // computed anything about its stack frame yet. 7249 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 7250 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 7251 MFI.getNumObjects() > 0) 7252 return UnknownCallOutlineType; 7253 7254 // At this point, we can say that CalleeMF ought to not pass anything on the 7255 // stack. Therefore, we can outline it. 7256 return outliner::InstrType::Legal; 7257 } 7258 7259 // Don't outline positions. 7260 if (MI.isPosition()) 7261 return outliner::InstrType::Illegal; 7262 7263 // Don't touch the link register or W30. 7264 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 7265 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 7266 return outliner::InstrType::Illegal; 7267 7268 // Don't outline BTI instructions, because that will prevent the outlining 7269 // site from being indirectly callable. 7270 if (MI.getOpcode() == AArch64::HINT) { 7271 int64_t Imm = MI.getOperand(0).getImm(); 7272 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 7273 return outliner::InstrType::Illegal; 7274 } 7275 7276 return outliner::InstrType::Legal; 7277 } 7278 7279 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 7280 for (MachineInstr &MI : MBB) { 7281 const MachineOperand *Base; 7282 unsigned Width; 7283 int64_t Offset; 7284 bool OffsetIsScalable; 7285 7286 // Is this a load or store with an immediate offset with SP as the base? 7287 if (!MI.mayLoadOrStore() || 7288 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, 7289 &RI) || 7290 (Base->isReg() && Base->getReg() != AArch64::SP)) 7291 continue; 7292 7293 // It is, so we have to fix it up. 7294 TypeSize Scale(0U, false); 7295 int64_t Dummy1, Dummy2; 7296 7297 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 7298 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 7299 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 7300 assert(Scale != 0 && "Unexpected opcode!"); 7301 assert(!OffsetIsScalable && "Expected offset to be a byte offset"); 7302 7303 // We've pushed the return address to the stack, so add 16 to the offset. 7304 // This is safe, since we already checked if it would overflow when we 7305 // checked if this instruction was legal to outline. 7306 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); 7307 StackOffsetOperand.setImm(NewImm); 7308 } 7309 } 7310 7311 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, 7312 bool ShouldSignReturnAddr, 7313 bool ShouldSignReturnAddrWithAKey) { 7314 if (ShouldSignReturnAddr) { 7315 MachineBasicBlock::iterator MBBPAC = MBB.begin(); 7316 MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator(); 7317 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 7318 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 7319 DebugLoc DL; 7320 7321 if (MBBAUT != MBB.end()) 7322 DL = MBBAUT->getDebugLoc(); 7323 7324 // At the very beginning of the basic block we insert the following 7325 // depending on the key type 7326 // 7327 // a_key: b_key: 7328 // PACIASP EMITBKEY 7329 // CFI_INSTRUCTION PACIBSP 7330 // CFI_INSTRUCTION 7331 unsigned PACI; 7332 if (ShouldSignReturnAddrWithAKey) { 7333 PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP; 7334 } else { 7335 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY)) 7336 .setMIFlag(MachineInstr::FrameSetup); 7337 PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP; 7338 } 7339 7340 auto MI = BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(PACI)); 7341 if (Subtarget.hasPAuth()) 7342 MI.addReg(AArch64::LR, RegState::Define) 7343 .addReg(AArch64::LR) 7344 .addReg(AArch64::SP, RegState::InternalRead); 7345 MI.setMIFlag(MachineInstr::FrameSetup); 7346 7347 unsigned CFIIndex = 7348 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 7349 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) 7350 .addCFIIndex(CFIIndex) 7351 .setMIFlags(MachineInstr::FrameSetup); 7352 7353 // If v8.3a features are available we can replace a RET instruction by 7354 // RETAA or RETAB and omit the AUT instructions 7355 if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && 7356 MBBAUT->getOpcode() == AArch64::RET) { 7357 BuildMI(MBB, MBBAUT, DL, 7358 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA 7359 : AArch64::RETAB)) 7360 .copyImplicitOps(*MBBAUT); 7361 MBB.erase(MBBAUT); 7362 } else { 7363 BuildMI(MBB, MBBAUT, DL, 7364 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP 7365 : AArch64::AUTIBSP)) 7366 .setMIFlag(MachineInstr::FrameDestroy); 7367 } 7368 } 7369 } 7370 7371 void AArch64InstrInfo::buildOutlinedFrame( 7372 MachineBasicBlock &MBB, MachineFunction &MF, 7373 const outliner::OutlinedFunction &OF) const { 7374 7375 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); 7376 7377 if (OF.FrameConstructionID == MachineOutlinerTailCall) 7378 FI->setOutliningStyle("Tail Call"); 7379 else if (OF.FrameConstructionID == MachineOutlinerThunk) { 7380 // For thunk outlining, rewrite the last instruction from a call to a 7381 // tail-call. 7382 MachineInstr *Call = &*--MBB.instr_end(); 7383 unsigned TailOpcode; 7384 if (Call->getOpcode() == AArch64::BL) { 7385 TailOpcode = AArch64::TCRETURNdi; 7386 } else { 7387 assert(Call->getOpcode() == AArch64::BLR || 7388 Call->getOpcode() == AArch64::BLRNoIP); 7389 TailOpcode = AArch64::TCRETURNriALL; 7390 } 7391 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 7392 .add(Call->getOperand(0)) 7393 .addImm(0); 7394 MBB.insert(MBB.end(), TC); 7395 Call->eraseFromParent(); 7396 7397 FI->setOutliningStyle("Thunk"); 7398 } 7399 7400 bool IsLeafFunction = true; 7401 7402 // Is there a call in the outlined range? 7403 auto IsNonTailCall = [](const MachineInstr &MI) { 7404 return MI.isCall() && !MI.isReturn(); 7405 }; 7406 7407 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { 7408 // Fix up the instructions in the range, since we're going to modify the 7409 // stack. 7410 7411 // Bugzilla ID: 46767 7412 // TODO: Check if fixing up twice is safe so we can outline these. 7413 assert(OF.FrameConstructionID != MachineOutlinerDefault && 7414 "Can only fix up stack references once"); 7415 fixupPostOutline(MBB); 7416 7417 IsLeafFunction = false; 7418 7419 // LR has to be a live in so that we can save it. 7420 if (!MBB.isLiveIn(AArch64::LR)) 7421 MBB.addLiveIn(AArch64::LR); 7422 7423 MachineBasicBlock::iterator It = MBB.begin(); 7424 MachineBasicBlock::iterator Et = MBB.end(); 7425 7426 if (OF.FrameConstructionID == MachineOutlinerTailCall || 7427 OF.FrameConstructionID == MachineOutlinerThunk) 7428 Et = std::prev(MBB.end()); 7429 7430 // Insert a save before the outlined region 7431 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 7432 .addReg(AArch64::SP, RegState::Define) 7433 .addReg(AArch64::LR) 7434 .addReg(AArch64::SP) 7435 .addImm(-16); 7436 It = MBB.insert(It, STRXpre); 7437 7438 const TargetSubtargetInfo &STI = MF.getSubtarget(); 7439 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 7440 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 7441 7442 // Add a CFI saying the stack was moved 16 B down. 7443 int64_t StackPosEntry = 7444 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); 7445 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 7446 .addCFIIndex(StackPosEntry) 7447 .setMIFlags(MachineInstr::FrameSetup); 7448 7449 // Add a CFI saying that the LR that we want to find is now 16 B higher than 7450 // before. 7451 int64_t LRPosEntry = 7452 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); 7453 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 7454 .addCFIIndex(LRPosEntry) 7455 .setMIFlags(MachineInstr::FrameSetup); 7456 7457 // Insert a restore before the terminator for the function. 7458 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 7459 .addReg(AArch64::SP, RegState::Define) 7460 .addReg(AArch64::LR, RegState::Define) 7461 .addReg(AArch64::SP) 7462 .addImm(16); 7463 Et = MBB.insert(Et, LDRXpost); 7464 } 7465 7466 // If a bunch of candidates reach this point they must agree on their return 7467 // address signing. It is therefore enough to just consider the signing 7468 // behaviour of one of them 7469 const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); 7470 bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); 7471 7472 // a_key is the default 7473 bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); 7474 7475 // If this is a tail call outlined function, then there's already a return. 7476 if (OF.FrameConstructionID == MachineOutlinerTailCall || 7477 OF.FrameConstructionID == MachineOutlinerThunk) { 7478 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 7479 ShouldSignReturnAddrWithAKey); 7480 return; 7481 } 7482 7483 // It's not a tail call, so we have to insert the return ourselves. 7484 7485 // LR has to be a live in so that we can return to it. 7486 if (!MBB.isLiveIn(AArch64::LR)) 7487 MBB.addLiveIn(AArch64::LR); 7488 7489 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 7490 .addReg(AArch64::LR); 7491 MBB.insert(MBB.end(), ret); 7492 7493 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 7494 ShouldSignReturnAddrWithAKey); 7495 7496 FI->setOutliningStyle("Function"); 7497 7498 // Did we have to modify the stack by saving the link register? 7499 if (OF.FrameConstructionID != MachineOutlinerDefault) 7500 return; 7501 7502 // We modified the stack. 7503 // Walk over the basic block and fix up all the stack accesses. 7504 fixupPostOutline(MBB); 7505 } 7506 7507 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 7508 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 7509 MachineFunction &MF, const outliner::Candidate &C) const { 7510 7511 // Are we tail calling? 7512 if (C.CallConstructionID == MachineOutlinerTailCall) { 7513 // If yes, then we can just branch to the label. 7514 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 7515 .addGlobalAddress(M.getNamedValue(MF.getName())) 7516 .addImm(0)); 7517 return It; 7518 } 7519 7520 // Are we saving the link register? 7521 if (C.CallConstructionID == MachineOutlinerNoLRSave || 7522 C.CallConstructionID == MachineOutlinerThunk) { 7523 // No, so just insert the call. 7524 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 7525 .addGlobalAddress(M.getNamedValue(MF.getName()))); 7526 return It; 7527 } 7528 7529 // We want to return the spot where we inserted the call. 7530 MachineBasicBlock::iterator CallPt; 7531 7532 // Instructions for saving and restoring LR around the call instruction we're 7533 // going to insert. 7534 MachineInstr *Save; 7535 MachineInstr *Restore; 7536 // Can we save to a register? 7537 if (C.CallConstructionID == MachineOutlinerRegSave) { 7538 // FIXME: This logic should be sunk into a target-specific interface so that 7539 // we don't have to recompute the register. 7540 unsigned Reg = findRegisterToSaveLRTo(C); 7541 assert(Reg != 0 && "No callee-saved register available?"); 7542 7543 // LR has to be a live in so that we can save it. 7544 if (!MBB.isLiveIn(AArch64::LR)) 7545 MBB.addLiveIn(AArch64::LR); 7546 7547 // Save and restore LR from Reg. 7548 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 7549 .addReg(AArch64::XZR) 7550 .addReg(AArch64::LR) 7551 .addImm(0); 7552 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 7553 .addReg(AArch64::XZR) 7554 .addReg(Reg) 7555 .addImm(0); 7556 } else { 7557 // We have the default case. Save and restore from SP. 7558 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 7559 .addReg(AArch64::SP, RegState::Define) 7560 .addReg(AArch64::LR) 7561 .addReg(AArch64::SP) 7562 .addImm(-16); 7563 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 7564 .addReg(AArch64::SP, RegState::Define) 7565 .addReg(AArch64::LR, RegState::Define) 7566 .addReg(AArch64::SP) 7567 .addImm(16); 7568 } 7569 7570 It = MBB.insert(It, Save); 7571 It++; 7572 7573 // Insert the call. 7574 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 7575 .addGlobalAddress(M.getNamedValue(MF.getName()))); 7576 CallPt = It; 7577 It++; 7578 7579 It = MBB.insert(It, Restore); 7580 return CallPt; 7581 } 7582 7583 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 7584 MachineFunction &MF) const { 7585 return MF.getFunction().hasMinSize(); 7586 } 7587 7588 Optional<DestSourcePair> 7589 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 7590 7591 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 7592 // and zero immediate operands used as an alias for mov instruction. 7593 if (MI.getOpcode() == AArch64::ORRWrs && 7594 MI.getOperand(1).getReg() == AArch64::WZR && 7595 MI.getOperand(3).getImm() == 0x0) { 7596 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 7597 } 7598 7599 if (MI.getOpcode() == AArch64::ORRXrs && 7600 MI.getOperand(1).getReg() == AArch64::XZR && 7601 MI.getOperand(3).getImm() == 0x0) { 7602 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 7603 } 7604 7605 return None; 7606 } 7607 7608 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, 7609 Register Reg) const { 7610 int Sign = 1; 7611 int64_t Offset = 0; 7612 7613 // TODO: Handle cases where Reg is a super- or sub-register of the 7614 // destination register. 7615 const MachineOperand &Op0 = MI.getOperand(0); 7616 if (!Op0.isReg() || Reg != Op0.getReg()) 7617 return None; 7618 7619 switch (MI.getOpcode()) { 7620 default: 7621 return None; 7622 case AArch64::SUBWri: 7623 case AArch64::SUBXri: 7624 case AArch64::SUBSWri: 7625 case AArch64::SUBSXri: 7626 Sign *= -1; 7627 LLVM_FALLTHROUGH; 7628 case AArch64::ADDSWri: 7629 case AArch64::ADDSXri: 7630 case AArch64::ADDWri: 7631 case AArch64::ADDXri: { 7632 // TODO: Third operand can be global address (usually some string). 7633 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 7634 !MI.getOperand(2).isImm()) 7635 return None; 7636 int Shift = MI.getOperand(3).getImm(); 7637 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); 7638 Offset = Sign * (MI.getOperand(2).getImm() << Shift); 7639 } 7640 } 7641 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 7642 } 7643 7644 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with 7645 /// the destination register then, if possible, describe the value in terms of 7646 /// the source register. 7647 static Optional<ParamLoadedValue> 7648 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, 7649 const TargetInstrInfo *TII, 7650 const TargetRegisterInfo *TRI) { 7651 auto DestSrc = TII->isCopyInstr(MI); 7652 if (!DestSrc) 7653 return None; 7654 7655 Register DestReg = DestSrc->Destination->getReg(); 7656 Register SrcReg = DestSrc->Source->getReg(); 7657 7658 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 7659 7660 // If the described register is the destination, just return the source. 7661 if (DestReg == DescribedReg) 7662 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7663 7664 // ORRWrs zero-extends to 64-bits, so we need to consider such cases. 7665 if (MI.getOpcode() == AArch64::ORRWrs && 7666 TRI->isSuperRegister(DestReg, DescribedReg)) 7667 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 7668 7669 // We may need to describe the lower part of a ORRXrs move. 7670 if (MI.getOpcode() == AArch64::ORRXrs && 7671 TRI->isSubRegister(DestReg, DescribedReg)) { 7672 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32); 7673 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 7674 } 7675 7676 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && 7677 "Unhandled ORR[XW]rs copy case"); 7678 7679 return None; 7680 } 7681 7682 Optional<ParamLoadedValue> 7683 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, 7684 Register Reg) const { 7685 const MachineFunction *MF = MI.getMF(); 7686 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 7687 switch (MI.getOpcode()) { 7688 case AArch64::MOVZWi: 7689 case AArch64::MOVZXi: { 7690 // MOVZWi may be used for producing zero-extended 32-bit immediates in 7691 // 64-bit parameters, so we need to consider super-registers. 7692 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 7693 return None; 7694 7695 if (!MI.getOperand(1).isImm()) 7696 return None; 7697 int64_t Immediate = MI.getOperand(1).getImm(); 7698 int Shift = MI.getOperand(2).getImm(); 7699 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), 7700 nullptr); 7701 } 7702 case AArch64::ORRWrs: 7703 case AArch64::ORRXrs: 7704 return describeORRLoadedValue(MI, Reg, this, TRI); 7705 } 7706 7707 return TargetInstrInfo::describeLoadedValue(MI, Reg); 7708 } 7709 7710 bool AArch64InstrInfo::isExtendLikelyToBeFolded( 7711 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const { 7712 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT || 7713 ExtMI.getOpcode() == TargetOpcode::G_ZEXT || 7714 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT); 7715 7716 // Anyexts are nops. 7717 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT) 7718 return true; 7719 7720 Register DefReg = ExtMI.getOperand(0).getReg(); 7721 if (!MRI.hasOneNonDBGUse(DefReg)) 7722 return false; 7723 7724 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an 7725 // addressing mode. 7726 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg); 7727 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD; 7728 } 7729 7730 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { 7731 return get(Opc).TSFlags & AArch64::ElementSizeMask; 7732 } 7733 7734 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { 7735 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; 7736 } 7737 7738 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { 7739 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; 7740 } 7741 7742 unsigned int 7743 AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const { 7744 return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2; 7745 } 7746 7747 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { 7748 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) 7749 return AArch64::BLRNoIP; 7750 else 7751 return AArch64::BLR; 7752 } 7753 7754 #define GET_INSTRINFO_HELPERS 7755 #define GET_INSTRMAP_INFO 7756 #include "AArch64GenInstrInfo.inc" 7757