1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineModuleInfo.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/MC/MCAsmInfo.h" 37 #include "llvm/MC/MCInst.h" 38 #include "llvm/MC/MCInstrDesc.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CodeGen.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Compiler.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetOptions.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <iterator> 50 #include <utility> 51 52 using namespace llvm; 53 54 #define GET_INSTRINFO_CTOR_DTOR 55 #include "AArch64GenInstrInfo.inc" 56 57 static cl::opt<unsigned> TBZDisplacementBits( 58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 60 61 static cl::opt<unsigned> CBZDisplacementBits( 62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 64 65 static cl::opt<unsigned> 66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 67 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 68 69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 71 AArch64::CATCHRET), 72 RI(STI.getTargetTriple()), Subtarget(STI) {} 73 74 /// GetInstSize - Return the number of bytes of code the specified 75 /// instruction may be. This returns the maximum number of bytes. 76 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 77 const MachineBasicBlock &MBB = *MI.getParent(); 78 const MachineFunction *MF = MBB.getParent(); 79 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 80 81 { 82 auto Op = MI.getOpcode(); 83 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 84 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 85 } 86 87 // Meta-instructions emit no code. 88 if (MI.isMetaInstruction()) 89 return 0; 90 91 // FIXME: We currently only handle pseudoinstructions that don't get expanded 92 // before the assembly printer. 93 unsigned NumBytes = 0; 94 const MCInstrDesc &Desc = MI.getDesc(); 95 switch (Desc.getOpcode()) { 96 default: 97 // Anything not explicitly designated otherwise is a normal 4-byte insn. 98 NumBytes = 4; 99 break; 100 case TargetOpcode::STACKMAP: 101 // The upper bound for a stackmap intrinsic is the full length of its shadow 102 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 103 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 104 break; 105 case TargetOpcode::PATCHPOINT: 106 // The size of the patchpoint intrinsic is the number of bytes requested 107 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 108 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 109 break; 110 case AArch64::TLSDESC_CALLSEQ: 111 // This gets lowered to an instruction sequence which takes 16 bytes 112 NumBytes = 16; 113 break; 114 case AArch64::SpeculationBarrierISBDSBEndBB: 115 // This gets lowered to 2 4-byte instructions. 116 NumBytes = 8; 117 break; 118 case AArch64::SpeculationBarrierSBEndBB: 119 // This gets lowered to 1 4-byte instructions. 120 NumBytes = 4; 121 break; 122 case AArch64::JumpTableDest32: 123 case AArch64::JumpTableDest16: 124 case AArch64::JumpTableDest8: 125 NumBytes = 12; 126 break; 127 case AArch64::SPACE: 128 NumBytes = MI.getOperand(1).getImm(); 129 break; 130 case TargetOpcode::BUNDLE: 131 NumBytes = getInstBundleLength(MI); 132 break; 133 } 134 135 return NumBytes; 136 } 137 138 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { 139 unsigned Size = 0; 140 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 141 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 142 while (++I != E && I->isInsideBundle()) { 143 assert(!I->isBundle() && "No nested bundle!"); 144 Size += getInstSizeInBytes(*I); 145 } 146 return Size; 147 } 148 149 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 150 SmallVectorImpl<MachineOperand> &Cond) { 151 // Block ends with fall-through condbranch. 152 switch (LastInst->getOpcode()) { 153 default: 154 llvm_unreachable("Unknown branch instruction?"); 155 case AArch64::Bcc: 156 Target = LastInst->getOperand(1).getMBB(); 157 Cond.push_back(LastInst->getOperand(0)); 158 break; 159 case AArch64::CBZW: 160 case AArch64::CBZX: 161 case AArch64::CBNZW: 162 case AArch64::CBNZX: 163 Target = LastInst->getOperand(1).getMBB(); 164 Cond.push_back(MachineOperand::CreateImm(-1)); 165 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 166 Cond.push_back(LastInst->getOperand(0)); 167 break; 168 case AArch64::TBZW: 169 case AArch64::TBZX: 170 case AArch64::TBNZW: 171 case AArch64::TBNZX: 172 Target = LastInst->getOperand(2).getMBB(); 173 Cond.push_back(MachineOperand::CreateImm(-1)); 174 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 175 Cond.push_back(LastInst->getOperand(0)); 176 Cond.push_back(LastInst->getOperand(1)); 177 } 178 } 179 180 static unsigned getBranchDisplacementBits(unsigned Opc) { 181 switch (Opc) { 182 default: 183 llvm_unreachable("unexpected opcode!"); 184 case AArch64::B: 185 return 64; 186 case AArch64::TBNZW: 187 case AArch64::TBZW: 188 case AArch64::TBNZX: 189 case AArch64::TBZX: 190 return TBZDisplacementBits; 191 case AArch64::CBNZW: 192 case AArch64::CBZW: 193 case AArch64::CBNZX: 194 case AArch64::CBZX: 195 return CBZDisplacementBits; 196 case AArch64::Bcc: 197 return BCCDisplacementBits; 198 } 199 } 200 201 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 202 int64_t BrOffset) const { 203 unsigned Bits = getBranchDisplacementBits(BranchOp); 204 assert(Bits >= 3 && "max branch displacement must be enough to jump" 205 "over conditional branch expansion"); 206 return isIntN(Bits, BrOffset / 4); 207 } 208 209 MachineBasicBlock * 210 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 211 switch (MI.getOpcode()) { 212 default: 213 llvm_unreachable("unexpected opcode!"); 214 case AArch64::B: 215 return MI.getOperand(0).getMBB(); 216 case AArch64::TBZW: 217 case AArch64::TBNZW: 218 case AArch64::TBZX: 219 case AArch64::TBNZX: 220 return MI.getOperand(2).getMBB(); 221 case AArch64::CBZW: 222 case AArch64::CBNZW: 223 case AArch64::CBZX: 224 case AArch64::CBNZX: 225 case AArch64::Bcc: 226 return MI.getOperand(1).getMBB(); 227 } 228 } 229 230 // Branch analysis. 231 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 232 MachineBasicBlock *&TBB, 233 MachineBasicBlock *&FBB, 234 SmallVectorImpl<MachineOperand> &Cond, 235 bool AllowModify) const { 236 // If the block has no terminators, it just falls into the block after it. 237 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 238 if (I == MBB.end()) 239 return false; 240 241 // Skip over SpeculationBarrierEndBB terminators 242 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 243 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 244 --I; 245 } 246 247 if (!isUnpredicatedTerminator(*I)) 248 return false; 249 250 // Get the last instruction in the block. 251 MachineInstr *LastInst = &*I; 252 253 // If there is only one terminator instruction, process it. 254 unsigned LastOpc = LastInst->getOpcode(); 255 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 256 if (isUncondBranchOpcode(LastOpc)) { 257 TBB = LastInst->getOperand(0).getMBB(); 258 return false; 259 } 260 if (isCondBranchOpcode(LastOpc)) { 261 // Block ends with fall-through condbranch. 262 parseCondBranch(LastInst, TBB, Cond); 263 return false; 264 } 265 return true; // Can't handle indirect branch. 266 } 267 268 // Get the instruction before it if it is a terminator. 269 MachineInstr *SecondLastInst = &*I; 270 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 271 272 // If AllowModify is true and the block ends with two or more unconditional 273 // branches, delete all but the first unconditional branch. 274 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 275 while (isUncondBranchOpcode(SecondLastOpc)) { 276 LastInst->eraseFromParent(); 277 LastInst = SecondLastInst; 278 LastOpc = LastInst->getOpcode(); 279 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 280 // Return now the only terminator is an unconditional branch. 281 TBB = LastInst->getOperand(0).getMBB(); 282 return false; 283 } else { 284 SecondLastInst = &*I; 285 SecondLastOpc = SecondLastInst->getOpcode(); 286 } 287 } 288 } 289 290 // If there are three terminators, we don't know what sort of block this is. 291 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 292 return true; 293 294 // If the block ends with a B and a Bcc, handle it. 295 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 296 parseCondBranch(SecondLastInst, TBB, Cond); 297 FBB = LastInst->getOperand(0).getMBB(); 298 return false; 299 } 300 301 // If the block ends with two unconditional branches, handle it. The second 302 // one is not executed, so remove it. 303 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 304 TBB = SecondLastInst->getOperand(0).getMBB(); 305 I = LastInst; 306 if (AllowModify) 307 I->eraseFromParent(); 308 return false; 309 } 310 311 // ...likewise if it ends with an indirect branch followed by an unconditional 312 // branch. 313 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 314 I = LastInst; 315 if (AllowModify) 316 I->eraseFromParent(); 317 return true; 318 } 319 320 // Otherwise, can't handle this. 321 return true; 322 } 323 324 bool AArch64InstrInfo::reverseBranchCondition( 325 SmallVectorImpl<MachineOperand> &Cond) const { 326 if (Cond[0].getImm() != -1) { 327 // Regular Bcc 328 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 329 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 330 } else { 331 // Folded compare-and-branch 332 switch (Cond[1].getImm()) { 333 default: 334 llvm_unreachable("Unknown conditional branch!"); 335 case AArch64::CBZW: 336 Cond[1].setImm(AArch64::CBNZW); 337 break; 338 case AArch64::CBNZW: 339 Cond[1].setImm(AArch64::CBZW); 340 break; 341 case AArch64::CBZX: 342 Cond[1].setImm(AArch64::CBNZX); 343 break; 344 case AArch64::CBNZX: 345 Cond[1].setImm(AArch64::CBZX); 346 break; 347 case AArch64::TBZW: 348 Cond[1].setImm(AArch64::TBNZW); 349 break; 350 case AArch64::TBNZW: 351 Cond[1].setImm(AArch64::TBZW); 352 break; 353 case AArch64::TBZX: 354 Cond[1].setImm(AArch64::TBNZX); 355 break; 356 case AArch64::TBNZX: 357 Cond[1].setImm(AArch64::TBZX); 358 break; 359 } 360 } 361 362 return false; 363 } 364 365 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 366 int *BytesRemoved) const { 367 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 368 if (I == MBB.end()) 369 return 0; 370 371 if (!isUncondBranchOpcode(I->getOpcode()) && 372 !isCondBranchOpcode(I->getOpcode())) 373 return 0; 374 375 // Remove the branch. 376 I->eraseFromParent(); 377 378 I = MBB.end(); 379 380 if (I == MBB.begin()) { 381 if (BytesRemoved) 382 *BytesRemoved = 4; 383 return 1; 384 } 385 --I; 386 if (!isCondBranchOpcode(I->getOpcode())) { 387 if (BytesRemoved) 388 *BytesRemoved = 4; 389 return 1; 390 } 391 392 // Remove the branch. 393 I->eraseFromParent(); 394 if (BytesRemoved) 395 *BytesRemoved = 8; 396 397 return 2; 398 } 399 400 void AArch64InstrInfo::instantiateCondBranch( 401 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 402 ArrayRef<MachineOperand> Cond) const { 403 if (Cond[0].getImm() != -1) { 404 // Regular Bcc 405 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 406 } else { 407 // Folded compare-and-branch 408 // Note that we use addOperand instead of addReg to keep the flags. 409 const MachineInstrBuilder MIB = 410 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 411 if (Cond.size() > 3) 412 MIB.addImm(Cond[3].getImm()); 413 MIB.addMBB(TBB); 414 } 415 } 416 417 unsigned AArch64InstrInfo::insertBranch( 418 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 419 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 420 // Shouldn't be a fall through. 421 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 422 423 if (!FBB) { 424 if (Cond.empty()) // Unconditional branch? 425 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 426 else 427 instantiateCondBranch(MBB, DL, TBB, Cond); 428 429 if (BytesAdded) 430 *BytesAdded = 4; 431 432 return 1; 433 } 434 435 // Two-way conditional branch. 436 instantiateCondBranch(MBB, DL, TBB, Cond); 437 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 438 439 if (BytesAdded) 440 *BytesAdded = 8; 441 442 return 2; 443 } 444 445 // Find the original register that VReg is copied from. 446 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 447 while (Register::isVirtualRegister(VReg)) { 448 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 449 if (!DefMI->isFullCopy()) 450 return VReg; 451 VReg = DefMI->getOperand(1).getReg(); 452 } 453 return VReg; 454 } 455 456 // Determine if VReg is defined by an instruction that can be folded into a 457 // csel instruction. If so, return the folded opcode, and the replacement 458 // register. 459 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 460 unsigned *NewVReg = nullptr) { 461 VReg = removeCopies(MRI, VReg); 462 if (!Register::isVirtualRegister(VReg)) 463 return 0; 464 465 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 466 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 467 unsigned Opc = 0; 468 unsigned SrcOpNum = 0; 469 switch (DefMI->getOpcode()) { 470 case AArch64::ADDSXri: 471 case AArch64::ADDSWri: 472 // if NZCV is used, do not fold. 473 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 474 return 0; 475 // fall-through to ADDXri and ADDWri. 476 LLVM_FALLTHROUGH; 477 case AArch64::ADDXri: 478 case AArch64::ADDWri: 479 // add x, 1 -> csinc. 480 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 481 DefMI->getOperand(3).getImm() != 0) 482 return 0; 483 SrcOpNum = 1; 484 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 485 break; 486 487 case AArch64::ORNXrr: 488 case AArch64::ORNWrr: { 489 // not x -> csinv, represented as orn dst, xzr, src. 490 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 491 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 492 return 0; 493 SrcOpNum = 2; 494 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 495 break; 496 } 497 498 case AArch64::SUBSXrr: 499 case AArch64::SUBSWrr: 500 // if NZCV is used, do not fold. 501 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 502 return 0; 503 // fall-through to SUBXrr and SUBWrr. 504 LLVM_FALLTHROUGH; 505 case AArch64::SUBXrr: 506 case AArch64::SUBWrr: { 507 // neg x -> csneg, represented as sub dst, xzr, src. 508 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 509 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 510 return 0; 511 SrcOpNum = 2; 512 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 513 break; 514 } 515 default: 516 return 0; 517 } 518 assert(Opc && SrcOpNum && "Missing parameters"); 519 520 if (NewVReg) 521 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 522 return Opc; 523 } 524 525 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 526 ArrayRef<MachineOperand> Cond, 527 Register DstReg, Register TrueReg, 528 Register FalseReg, int &CondCycles, 529 int &TrueCycles, 530 int &FalseCycles) const { 531 // Check register classes. 532 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 533 const TargetRegisterClass *RC = 534 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 535 if (!RC) 536 return false; 537 538 // Also need to check the dest regclass, in case we're trying to optimize 539 // something like: 540 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 541 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) 542 return false; 543 544 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 545 unsigned ExtraCondLat = Cond.size() != 1; 546 547 // GPRs are handled by csel. 548 // FIXME: Fold in x+1, -x, and ~x when applicable. 549 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 550 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 551 // Single-cycle csel, csinc, csinv, and csneg. 552 CondCycles = 1 + ExtraCondLat; 553 TrueCycles = FalseCycles = 1; 554 if (canFoldIntoCSel(MRI, TrueReg)) 555 TrueCycles = 0; 556 else if (canFoldIntoCSel(MRI, FalseReg)) 557 FalseCycles = 0; 558 return true; 559 } 560 561 // Scalar floating point is handled by fcsel. 562 // FIXME: Form fabs, fmin, and fmax when applicable. 563 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 564 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 565 CondCycles = 5 + ExtraCondLat; 566 TrueCycles = FalseCycles = 2; 567 return true; 568 } 569 570 // Can't do vectors. 571 return false; 572 } 573 574 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 575 MachineBasicBlock::iterator I, 576 const DebugLoc &DL, Register DstReg, 577 ArrayRef<MachineOperand> Cond, 578 Register TrueReg, Register FalseReg) const { 579 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 580 581 // Parse the condition code, see parseCondBranch() above. 582 AArch64CC::CondCode CC; 583 switch (Cond.size()) { 584 default: 585 llvm_unreachable("Unknown condition opcode in Cond"); 586 case 1: // b.cc 587 CC = AArch64CC::CondCode(Cond[0].getImm()); 588 break; 589 case 3: { // cbz/cbnz 590 // We must insert a compare against 0. 591 bool Is64Bit; 592 switch (Cond[1].getImm()) { 593 default: 594 llvm_unreachable("Unknown branch opcode in Cond"); 595 case AArch64::CBZW: 596 Is64Bit = false; 597 CC = AArch64CC::EQ; 598 break; 599 case AArch64::CBZX: 600 Is64Bit = true; 601 CC = AArch64CC::EQ; 602 break; 603 case AArch64::CBNZW: 604 Is64Bit = false; 605 CC = AArch64CC::NE; 606 break; 607 case AArch64::CBNZX: 608 Is64Bit = true; 609 CC = AArch64CC::NE; 610 break; 611 } 612 Register SrcReg = Cond[2].getReg(); 613 if (Is64Bit) { 614 // cmp reg, #0 is actually subs xzr, reg, #0. 615 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 616 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 617 .addReg(SrcReg) 618 .addImm(0) 619 .addImm(0); 620 } else { 621 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 622 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 623 .addReg(SrcReg) 624 .addImm(0) 625 .addImm(0); 626 } 627 break; 628 } 629 case 4: { // tbz/tbnz 630 // We must insert a tst instruction. 631 switch (Cond[1].getImm()) { 632 default: 633 llvm_unreachable("Unknown branch opcode in Cond"); 634 case AArch64::TBZW: 635 case AArch64::TBZX: 636 CC = AArch64CC::EQ; 637 break; 638 case AArch64::TBNZW: 639 case AArch64::TBNZX: 640 CC = AArch64CC::NE; 641 break; 642 } 643 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 644 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 645 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 646 .addReg(Cond[2].getReg()) 647 .addImm( 648 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 649 else 650 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 651 .addReg(Cond[2].getReg()) 652 .addImm( 653 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 654 break; 655 } 656 } 657 658 unsigned Opc = 0; 659 const TargetRegisterClass *RC = nullptr; 660 bool TryFold = false; 661 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 662 RC = &AArch64::GPR64RegClass; 663 Opc = AArch64::CSELXr; 664 TryFold = true; 665 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 666 RC = &AArch64::GPR32RegClass; 667 Opc = AArch64::CSELWr; 668 TryFold = true; 669 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 670 RC = &AArch64::FPR64RegClass; 671 Opc = AArch64::FCSELDrrr; 672 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 673 RC = &AArch64::FPR32RegClass; 674 Opc = AArch64::FCSELSrrr; 675 } 676 assert(RC && "Unsupported regclass"); 677 678 // Try folding simple instructions into the csel. 679 if (TryFold) { 680 unsigned NewVReg = 0; 681 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 682 if (FoldedOpc) { 683 // The folded opcodes csinc, csinc and csneg apply the operation to 684 // FalseReg, so we need to invert the condition. 685 CC = AArch64CC::getInvertedCondCode(CC); 686 TrueReg = FalseReg; 687 } else 688 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 689 690 // Fold the operation. Leave any dead instructions for DCE to clean up. 691 if (FoldedOpc) { 692 FalseReg = NewVReg; 693 Opc = FoldedOpc; 694 // The extends the live range of NewVReg. 695 MRI.clearKillFlags(NewVReg); 696 } 697 } 698 699 // Pull all virtual register into the appropriate class. 700 MRI.constrainRegClass(TrueReg, RC); 701 MRI.constrainRegClass(FalseReg, RC); 702 703 // Insert the csel. 704 BuildMI(MBB, I, DL, get(Opc), DstReg) 705 .addReg(TrueReg) 706 .addReg(FalseReg) 707 .addImm(CC); 708 } 709 710 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 711 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 712 uint64_t Imm = MI.getOperand(1).getImm(); 713 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 714 uint64_t Encoding; 715 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 716 } 717 718 // FIXME: this implementation should be micro-architecture dependent, so a 719 // micro-architecture target hook should be introduced here in future. 720 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 721 if (!Subtarget.hasCustomCheapAsMoveHandling()) 722 return MI.isAsCheapAsAMove(); 723 724 const unsigned Opcode = MI.getOpcode(); 725 726 // Firstly, check cases gated by features. 727 728 if (Subtarget.hasZeroCycleZeroingFP()) { 729 if (Opcode == AArch64::FMOVH0 || 730 Opcode == AArch64::FMOVS0 || 731 Opcode == AArch64::FMOVD0) 732 return true; 733 } 734 735 if (Subtarget.hasZeroCycleZeroingGP()) { 736 if (Opcode == TargetOpcode::COPY && 737 (MI.getOperand(1).getReg() == AArch64::WZR || 738 MI.getOperand(1).getReg() == AArch64::XZR)) 739 return true; 740 } 741 742 // Secondly, check cases specific to sub-targets. 743 744 if (Subtarget.hasExynosCheapAsMoveHandling()) { 745 if (isExynosCheapAsMove(MI)) 746 return true; 747 748 return MI.isAsCheapAsAMove(); 749 } 750 751 // Finally, check generic cases. 752 753 switch (Opcode) { 754 default: 755 return false; 756 757 // add/sub on register without shift 758 case AArch64::ADDWri: 759 case AArch64::ADDXri: 760 case AArch64::SUBWri: 761 case AArch64::SUBXri: 762 return (MI.getOperand(3).getImm() == 0); 763 764 // logical ops on immediate 765 case AArch64::ANDWri: 766 case AArch64::ANDXri: 767 case AArch64::EORWri: 768 case AArch64::EORXri: 769 case AArch64::ORRWri: 770 case AArch64::ORRXri: 771 return true; 772 773 // logical ops on register without shift 774 case AArch64::ANDWrr: 775 case AArch64::ANDXrr: 776 case AArch64::BICWrr: 777 case AArch64::BICXrr: 778 case AArch64::EONWrr: 779 case AArch64::EONXrr: 780 case AArch64::EORWrr: 781 case AArch64::EORXrr: 782 case AArch64::ORNWrr: 783 case AArch64::ORNXrr: 784 case AArch64::ORRWrr: 785 case AArch64::ORRXrr: 786 return true; 787 788 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 789 // ORRXri, it is as cheap as MOV 790 case AArch64::MOVi32imm: 791 return canBeExpandedToORR(MI, 32); 792 case AArch64::MOVi64imm: 793 return canBeExpandedToORR(MI, 64); 794 } 795 796 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 797 } 798 799 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 800 switch (MI.getOpcode()) { 801 default: 802 return false; 803 804 case AArch64::ADDWrs: 805 case AArch64::ADDXrs: 806 case AArch64::ADDSWrs: 807 case AArch64::ADDSXrs: { 808 unsigned Imm = MI.getOperand(3).getImm(); 809 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 810 if (ShiftVal == 0) 811 return true; 812 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 813 } 814 815 case AArch64::ADDWrx: 816 case AArch64::ADDXrx: 817 case AArch64::ADDXrx64: 818 case AArch64::ADDSWrx: 819 case AArch64::ADDSXrx: 820 case AArch64::ADDSXrx64: { 821 unsigned Imm = MI.getOperand(3).getImm(); 822 switch (AArch64_AM::getArithExtendType(Imm)) { 823 default: 824 return false; 825 case AArch64_AM::UXTB: 826 case AArch64_AM::UXTH: 827 case AArch64_AM::UXTW: 828 case AArch64_AM::UXTX: 829 return AArch64_AM::getArithShiftValue(Imm) <= 4; 830 } 831 } 832 833 case AArch64::SUBWrs: 834 case AArch64::SUBSWrs: { 835 unsigned Imm = MI.getOperand(3).getImm(); 836 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 837 return ShiftVal == 0 || 838 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 839 } 840 841 case AArch64::SUBXrs: 842 case AArch64::SUBSXrs: { 843 unsigned Imm = MI.getOperand(3).getImm(); 844 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 845 return ShiftVal == 0 || 846 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 847 } 848 849 case AArch64::SUBWrx: 850 case AArch64::SUBXrx: 851 case AArch64::SUBXrx64: 852 case AArch64::SUBSWrx: 853 case AArch64::SUBSXrx: 854 case AArch64::SUBSXrx64: { 855 unsigned Imm = MI.getOperand(3).getImm(); 856 switch (AArch64_AM::getArithExtendType(Imm)) { 857 default: 858 return false; 859 case AArch64_AM::UXTB: 860 case AArch64_AM::UXTH: 861 case AArch64_AM::UXTW: 862 case AArch64_AM::UXTX: 863 return AArch64_AM::getArithShiftValue(Imm) == 0; 864 } 865 } 866 867 case AArch64::LDRBBroW: 868 case AArch64::LDRBBroX: 869 case AArch64::LDRBroW: 870 case AArch64::LDRBroX: 871 case AArch64::LDRDroW: 872 case AArch64::LDRDroX: 873 case AArch64::LDRHHroW: 874 case AArch64::LDRHHroX: 875 case AArch64::LDRHroW: 876 case AArch64::LDRHroX: 877 case AArch64::LDRQroW: 878 case AArch64::LDRQroX: 879 case AArch64::LDRSBWroW: 880 case AArch64::LDRSBWroX: 881 case AArch64::LDRSBXroW: 882 case AArch64::LDRSBXroX: 883 case AArch64::LDRSHWroW: 884 case AArch64::LDRSHWroX: 885 case AArch64::LDRSHXroW: 886 case AArch64::LDRSHXroX: 887 case AArch64::LDRSWroW: 888 case AArch64::LDRSWroX: 889 case AArch64::LDRSroW: 890 case AArch64::LDRSroX: 891 case AArch64::LDRWroW: 892 case AArch64::LDRWroX: 893 case AArch64::LDRXroW: 894 case AArch64::LDRXroX: 895 case AArch64::PRFMroW: 896 case AArch64::PRFMroX: 897 case AArch64::STRBBroW: 898 case AArch64::STRBBroX: 899 case AArch64::STRBroW: 900 case AArch64::STRBroX: 901 case AArch64::STRDroW: 902 case AArch64::STRDroX: 903 case AArch64::STRHHroW: 904 case AArch64::STRHHroX: 905 case AArch64::STRHroW: 906 case AArch64::STRHroX: 907 case AArch64::STRQroW: 908 case AArch64::STRQroX: 909 case AArch64::STRSroW: 910 case AArch64::STRSroX: 911 case AArch64::STRWroW: 912 case AArch64::STRWroX: 913 case AArch64::STRXroW: 914 case AArch64::STRXroX: { 915 unsigned IsSigned = MI.getOperand(3).getImm(); 916 return !IsSigned; 917 } 918 } 919 } 920 921 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 922 unsigned Opc = MI.getOpcode(); 923 switch (Opc) { 924 default: 925 return false; 926 case AArch64::SEH_StackAlloc: 927 case AArch64::SEH_SaveFPLR: 928 case AArch64::SEH_SaveFPLR_X: 929 case AArch64::SEH_SaveReg: 930 case AArch64::SEH_SaveReg_X: 931 case AArch64::SEH_SaveRegP: 932 case AArch64::SEH_SaveRegP_X: 933 case AArch64::SEH_SaveFReg: 934 case AArch64::SEH_SaveFReg_X: 935 case AArch64::SEH_SaveFRegP: 936 case AArch64::SEH_SaveFRegP_X: 937 case AArch64::SEH_SetFP: 938 case AArch64::SEH_AddFP: 939 case AArch64::SEH_Nop: 940 case AArch64::SEH_PrologEnd: 941 case AArch64::SEH_EpilogStart: 942 case AArch64::SEH_EpilogEnd: 943 return true; 944 } 945 } 946 947 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 948 Register &SrcReg, Register &DstReg, 949 unsigned &SubIdx) const { 950 switch (MI.getOpcode()) { 951 default: 952 return false; 953 case AArch64::SBFMXri: // aka sxtw 954 case AArch64::UBFMXri: // aka uxtw 955 // Check for the 32 -> 64 bit extension case, these instructions can do 956 // much more. 957 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 958 return false; 959 // This is a signed or unsigned 32 -> 64 bit extension. 960 SrcReg = MI.getOperand(1).getReg(); 961 DstReg = MI.getOperand(0).getReg(); 962 SubIdx = AArch64::sub_32; 963 return true; 964 } 965 } 966 967 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 968 const MachineInstr &MIa, const MachineInstr &MIb) const { 969 const TargetRegisterInfo *TRI = &getRegisterInfo(); 970 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 971 int64_t OffsetA = 0, OffsetB = 0; 972 unsigned WidthA = 0, WidthB = 0; 973 bool OffsetAIsScalable = false, OffsetBIsScalable = false; 974 975 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 976 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 977 978 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 979 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 980 return false; 981 982 // Retrieve the base, offset from the base and width. Width 983 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 984 // base are identical, and the offset of a lower memory access + 985 // the width doesn't overlap the offset of a higher memory access, 986 // then the memory accesses are different. 987 // If OffsetAIsScalable and OffsetBIsScalable are both true, they 988 // are assumed to have the same scale (vscale). 989 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable, 990 WidthA, TRI) && 991 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable, 992 WidthB, TRI)) { 993 if (BaseOpA->isIdenticalTo(*BaseOpB) && 994 OffsetAIsScalable == OffsetBIsScalable) { 995 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 996 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 997 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 998 if (LowOffset + LowWidth <= HighOffset) 999 return true; 1000 } 1001 } 1002 return false; 1003 } 1004 1005 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1006 const MachineBasicBlock *MBB, 1007 const MachineFunction &MF) const { 1008 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 1009 return true; 1010 switch (MI.getOpcode()) { 1011 case AArch64::HINT: 1012 // CSDB hints are scheduling barriers. 1013 if (MI.getOperand(0).getImm() == 0x14) 1014 return true; 1015 break; 1016 case AArch64::DSB: 1017 case AArch64::ISB: 1018 // DSB and ISB also are scheduling barriers. 1019 return true; 1020 default:; 1021 } 1022 return isSEHInstruction(MI); 1023 } 1024 1025 /// analyzeCompare - For a comparison instruction, return the source registers 1026 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 1027 /// Return true if the comparison instruction can be analyzed. 1028 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 1029 Register &SrcReg2, int &CmpMask, 1030 int &CmpValue) const { 1031 // The first operand can be a frame index where we'd normally expect a 1032 // register. 1033 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 1034 if (!MI.getOperand(1).isReg()) 1035 return false; 1036 1037 switch (MI.getOpcode()) { 1038 default: 1039 break; 1040 case AArch64::SUBSWrr: 1041 case AArch64::SUBSWrs: 1042 case AArch64::SUBSWrx: 1043 case AArch64::SUBSXrr: 1044 case AArch64::SUBSXrs: 1045 case AArch64::SUBSXrx: 1046 case AArch64::ADDSWrr: 1047 case AArch64::ADDSWrs: 1048 case AArch64::ADDSWrx: 1049 case AArch64::ADDSXrr: 1050 case AArch64::ADDSXrs: 1051 case AArch64::ADDSXrx: 1052 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1053 SrcReg = MI.getOperand(1).getReg(); 1054 SrcReg2 = MI.getOperand(2).getReg(); 1055 CmpMask = ~0; 1056 CmpValue = 0; 1057 return true; 1058 case AArch64::SUBSWri: 1059 case AArch64::ADDSWri: 1060 case AArch64::SUBSXri: 1061 case AArch64::ADDSXri: 1062 SrcReg = MI.getOperand(1).getReg(); 1063 SrcReg2 = 0; 1064 CmpMask = ~0; 1065 // FIXME: In order to convert CmpValue to 0 or 1 1066 CmpValue = MI.getOperand(2).getImm() != 0; 1067 return true; 1068 case AArch64::ANDSWri: 1069 case AArch64::ANDSXri: 1070 // ANDS does not use the same encoding scheme as the others xxxS 1071 // instructions. 1072 SrcReg = MI.getOperand(1).getReg(); 1073 SrcReg2 = 0; 1074 CmpMask = ~0; 1075 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 1076 // while the type of CmpValue is int. When converting uint64_t to int, 1077 // the high 32 bits of uint64_t will be lost. 1078 // In fact it causes a bug in spec2006-483.xalancbmk 1079 // CmpValue is only used to compare with zero in OptimizeCompareInstr 1080 CmpValue = AArch64_AM::decodeLogicalImmediate( 1081 MI.getOperand(2).getImm(), 1082 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 1083 return true; 1084 } 1085 1086 return false; 1087 } 1088 1089 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1090 MachineBasicBlock *MBB = Instr.getParent(); 1091 assert(MBB && "Can't get MachineBasicBlock here"); 1092 MachineFunction *MF = MBB->getParent(); 1093 assert(MF && "Can't get MachineFunction here"); 1094 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1095 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1096 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1097 1098 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1099 ++OpIdx) { 1100 MachineOperand &MO = Instr.getOperand(OpIdx); 1101 const TargetRegisterClass *OpRegCstraints = 1102 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1103 1104 // If there's no constraint, there's nothing to do. 1105 if (!OpRegCstraints) 1106 continue; 1107 // If the operand is a frame index, there's nothing to do here. 1108 // A frame index operand will resolve correctly during PEI. 1109 if (MO.isFI()) 1110 continue; 1111 1112 assert(MO.isReg() && 1113 "Operand has register constraints without being a register!"); 1114 1115 Register Reg = MO.getReg(); 1116 if (Register::isPhysicalRegister(Reg)) { 1117 if (!OpRegCstraints->contains(Reg)) 1118 return false; 1119 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1120 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1121 return false; 1122 } 1123 1124 return true; 1125 } 1126 1127 /// Return the opcode that does not set flags when possible - otherwise 1128 /// return the original opcode. The caller is responsible to do the actual 1129 /// substitution and legality checking. 1130 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1131 // Don't convert all compare instructions, because for some the zero register 1132 // encoding becomes the sp register. 1133 bool MIDefinesZeroReg = false; 1134 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1135 MIDefinesZeroReg = true; 1136 1137 switch (MI.getOpcode()) { 1138 default: 1139 return MI.getOpcode(); 1140 case AArch64::ADDSWrr: 1141 return AArch64::ADDWrr; 1142 case AArch64::ADDSWri: 1143 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1144 case AArch64::ADDSWrs: 1145 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1146 case AArch64::ADDSWrx: 1147 return AArch64::ADDWrx; 1148 case AArch64::ADDSXrr: 1149 return AArch64::ADDXrr; 1150 case AArch64::ADDSXri: 1151 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1152 case AArch64::ADDSXrs: 1153 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1154 case AArch64::ADDSXrx: 1155 return AArch64::ADDXrx; 1156 case AArch64::SUBSWrr: 1157 return AArch64::SUBWrr; 1158 case AArch64::SUBSWri: 1159 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1160 case AArch64::SUBSWrs: 1161 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1162 case AArch64::SUBSWrx: 1163 return AArch64::SUBWrx; 1164 case AArch64::SUBSXrr: 1165 return AArch64::SUBXrr; 1166 case AArch64::SUBSXri: 1167 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1168 case AArch64::SUBSXrs: 1169 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1170 case AArch64::SUBSXrx: 1171 return AArch64::SUBXrx; 1172 } 1173 } 1174 1175 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1176 1177 /// True when condition flags are accessed (either by writing or reading) 1178 /// on the instruction trace starting at From and ending at To. 1179 /// 1180 /// Note: If From and To are from different blocks it's assumed CC are accessed 1181 /// on the path. 1182 static bool areCFlagsAccessedBetweenInstrs( 1183 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1184 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1185 // Early exit if To is at the beginning of the BB. 1186 if (To == To->getParent()->begin()) 1187 return true; 1188 1189 // Check whether the instructions are in the same basic block 1190 // If not, assume the condition flags might get modified somewhere. 1191 if (To->getParent() != From->getParent()) 1192 return true; 1193 1194 // From must be above To. 1195 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 1196 [From](MachineInstr &MI) { 1197 return MI.getIterator() == From; 1198 }) != To->getParent()->rend()); 1199 1200 // We iterate backward starting at \p To until we hit \p From. 1201 for (const MachineInstr &Instr : 1202 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { 1203 if (((AccessToCheck & AK_Write) && 1204 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1205 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1206 return true; 1207 } 1208 return false; 1209 } 1210 1211 /// Try to optimize a compare instruction. A compare instruction is an 1212 /// instruction which produces AArch64::NZCV. It can be truly compare 1213 /// instruction 1214 /// when there are no uses of its destination register. 1215 /// 1216 /// The following steps are tried in order: 1217 /// 1. Convert CmpInstr into an unconditional version. 1218 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1219 /// condition code or an instruction which can be converted into such an 1220 /// instruction. 1221 /// Only comparison with zero is supported. 1222 bool AArch64InstrInfo::optimizeCompareInstr( 1223 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask, 1224 int CmpValue, const MachineRegisterInfo *MRI) const { 1225 assert(CmpInstr.getParent()); 1226 assert(MRI); 1227 1228 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1229 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1230 if (DeadNZCVIdx != -1) { 1231 if (CmpInstr.definesRegister(AArch64::WZR) || 1232 CmpInstr.definesRegister(AArch64::XZR)) { 1233 CmpInstr.eraseFromParent(); 1234 return true; 1235 } 1236 unsigned Opc = CmpInstr.getOpcode(); 1237 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1238 if (NewOpc == Opc) 1239 return false; 1240 const MCInstrDesc &MCID = get(NewOpc); 1241 CmpInstr.setDesc(MCID); 1242 CmpInstr.RemoveOperand(DeadNZCVIdx); 1243 bool succeeded = UpdateOperandRegClass(CmpInstr); 1244 (void)succeeded; 1245 assert(succeeded && "Some operands reg class are incompatible!"); 1246 return true; 1247 } 1248 1249 // Continue only if we have a "ri" where immediate is zero. 1250 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1251 // function. 1252 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1253 if (CmpValue != 0 || SrcReg2 != 0) 1254 return false; 1255 1256 // CmpInstr is a Compare instruction if destination register is not used. 1257 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1258 return false; 1259 1260 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1261 } 1262 1263 /// Get opcode of S version of Instr. 1264 /// If Instr is S version its opcode is returned. 1265 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1266 /// or we are not interested in it. 1267 static unsigned sForm(MachineInstr &Instr) { 1268 switch (Instr.getOpcode()) { 1269 default: 1270 return AArch64::INSTRUCTION_LIST_END; 1271 1272 case AArch64::ADDSWrr: 1273 case AArch64::ADDSWri: 1274 case AArch64::ADDSXrr: 1275 case AArch64::ADDSXri: 1276 case AArch64::SUBSWrr: 1277 case AArch64::SUBSWri: 1278 case AArch64::SUBSXrr: 1279 case AArch64::SUBSXri: 1280 return Instr.getOpcode(); 1281 1282 case AArch64::ADDWrr: 1283 return AArch64::ADDSWrr; 1284 case AArch64::ADDWri: 1285 return AArch64::ADDSWri; 1286 case AArch64::ADDXrr: 1287 return AArch64::ADDSXrr; 1288 case AArch64::ADDXri: 1289 return AArch64::ADDSXri; 1290 case AArch64::ADCWr: 1291 return AArch64::ADCSWr; 1292 case AArch64::ADCXr: 1293 return AArch64::ADCSXr; 1294 case AArch64::SUBWrr: 1295 return AArch64::SUBSWrr; 1296 case AArch64::SUBWri: 1297 return AArch64::SUBSWri; 1298 case AArch64::SUBXrr: 1299 return AArch64::SUBSXrr; 1300 case AArch64::SUBXri: 1301 return AArch64::SUBSXri; 1302 case AArch64::SBCWr: 1303 return AArch64::SBCSWr; 1304 case AArch64::SBCXr: 1305 return AArch64::SBCSXr; 1306 case AArch64::ANDWri: 1307 return AArch64::ANDSWri; 1308 case AArch64::ANDXri: 1309 return AArch64::ANDSXri; 1310 } 1311 } 1312 1313 /// Check if AArch64::NZCV should be alive in successors of MBB. 1314 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1315 for (auto *BB : MBB->successors()) 1316 if (BB->isLiveIn(AArch64::NZCV)) 1317 return true; 1318 return false; 1319 } 1320 1321 namespace { 1322 1323 struct UsedNZCV { 1324 bool N = false; 1325 bool Z = false; 1326 bool C = false; 1327 bool V = false; 1328 1329 UsedNZCV() = default; 1330 1331 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1332 this->N |= UsedFlags.N; 1333 this->Z |= UsedFlags.Z; 1334 this->C |= UsedFlags.C; 1335 this->V |= UsedFlags.V; 1336 return *this; 1337 } 1338 }; 1339 1340 } // end anonymous namespace 1341 1342 /// Find a condition code used by the instruction. 1343 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1344 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1345 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1346 switch (Instr.getOpcode()) { 1347 default: 1348 return AArch64CC::Invalid; 1349 1350 case AArch64::Bcc: { 1351 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1352 assert(Idx >= 2); 1353 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1354 } 1355 1356 case AArch64::CSINVWr: 1357 case AArch64::CSINVXr: 1358 case AArch64::CSINCWr: 1359 case AArch64::CSINCXr: 1360 case AArch64::CSELWr: 1361 case AArch64::CSELXr: 1362 case AArch64::CSNEGWr: 1363 case AArch64::CSNEGXr: 1364 case AArch64::FCSELSrrr: 1365 case AArch64::FCSELDrrr: { 1366 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1367 assert(Idx >= 1); 1368 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1369 } 1370 } 1371 } 1372 1373 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1374 assert(CC != AArch64CC::Invalid); 1375 UsedNZCV UsedFlags; 1376 switch (CC) { 1377 default: 1378 break; 1379 1380 case AArch64CC::EQ: // Z set 1381 case AArch64CC::NE: // Z clear 1382 UsedFlags.Z = true; 1383 break; 1384 1385 case AArch64CC::HI: // Z clear and C set 1386 case AArch64CC::LS: // Z set or C clear 1387 UsedFlags.Z = true; 1388 LLVM_FALLTHROUGH; 1389 case AArch64CC::HS: // C set 1390 case AArch64CC::LO: // C clear 1391 UsedFlags.C = true; 1392 break; 1393 1394 case AArch64CC::MI: // N set 1395 case AArch64CC::PL: // N clear 1396 UsedFlags.N = true; 1397 break; 1398 1399 case AArch64CC::VS: // V set 1400 case AArch64CC::VC: // V clear 1401 UsedFlags.V = true; 1402 break; 1403 1404 case AArch64CC::GT: // Z clear, N and V the same 1405 case AArch64CC::LE: // Z set, N and V differ 1406 UsedFlags.Z = true; 1407 LLVM_FALLTHROUGH; 1408 case AArch64CC::GE: // N and V the same 1409 case AArch64CC::LT: // N and V differ 1410 UsedFlags.N = true; 1411 UsedFlags.V = true; 1412 break; 1413 } 1414 return UsedFlags; 1415 } 1416 1417 static bool isADDSRegImm(unsigned Opcode) { 1418 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1419 } 1420 1421 static bool isSUBSRegImm(unsigned Opcode) { 1422 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1423 } 1424 1425 /// Check if CmpInstr can be substituted by MI. 1426 /// 1427 /// CmpInstr can be substituted: 1428 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1429 /// - and, MI and CmpInstr are from the same MachineBB 1430 /// - and, condition flags are not alive in successors of the CmpInstr parent 1431 /// - and, if MI opcode is the S form there must be no defs of flags between 1432 /// MI and CmpInstr 1433 /// or if MI opcode is not the S form there must be neither defs of flags 1434 /// nor uses of flags between MI and CmpInstr. 1435 /// - and C/V flags are not used after CmpInstr 1436 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1437 const TargetRegisterInfo *TRI) { 1438 assert(MI); 1439 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1440 assert(CmpInstr); 1441 1442 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1443 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1444 return false; 1445 1446 if (MI->getParent() != CmpInstr->getParent()) 1447 return false; 1448 1449 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1450 return false; 1451 1452 AccessKind AccessToCheck = AK_Write; 1453 if (sForm(*MI) != MI->getOpcode()) 1454 AccessToCheck = AK_All; 1455 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1456 return false; 1457 1458 UsedNZCV NZCVUsedAfterCmp; 1459 for (const MachineInstr &Instr : 1460 instructionsWithoutDebug(std::next(CmpInstr->getIterator()), 1461 CmpInstr->getParent()->instr_end())) { 1462 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1463 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1464 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1465 return false; 1466 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1467 } 1468 1469 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1470 break; 1471 } 1472 1473 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1474 } 1475 1476 /// Substitute an instruction comparing to zero with another instruction 1477 /// which produces needed condition flags. 1478 /// 1479 /// Return true on success. 1480 bool AArch64InstrInfo::substituteCmpToZero( 1481 MachineInstr &CmpInstr, unsigned SrcReg, 1482 const MachineRegisterInfo *MRI) const { 1483 assert(MRI); 1484 // Get the unique definition of SrcReg. 1485 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1486 if (!MI) 1487 return false; 1488 1489 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1490 1491 unsigned NewOpc = sForm(*MI); 1492 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1493 return false; 1494 1495 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1496 return false; 1497 1498 // Update the instruction to set NZCV. 1499 MI->setDesc(get(NewOpc)); 1500 CmpInstr.eraseFromParent(); 1501 bool succeeded = UpdateOperandRegClass(*MI); 1502 (void)succeeded; 1503 assert(succeeded && "Some operands reg class are incompatible!"); 1504 MI->addRegisterDefined(AArch64::NZCV, TRI); 1505 return true; 1506 } 1507 1508 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1509 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1510 MI.getOpcode() != AArch64::CATCHRET) 1511 return false; 1512 1513 MachineBasicBlock &MBB = *MI.getParent(); 1514 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 1515 auto TRI = Subtarget.getRegisterInfo(); 1516 DebugLoc DL = MI.getDebugLoc(); 1517 1518 if (MI.getOpcode() == AArch64::CATCHRET) { 1519 // Skip to the first instruction before the epilog. 1520 const TargetInstrInfo *TII = 1521 MBB.getParent()->getSubtarget().getInstrInfo(); 1522 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1523 auto MBBI = MachineBasicBlock::iterator(MI); 1524 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1525 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1526 FirstEpilogSEH != MBB.begin()) 1527 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1528 if (FirstEpilogSEH != MBB.begin()) 1529 FirstEpilogSEH = std::next(FirstEpilogSEH); 1530 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1531 .addReg(AArch64::X0, RegState::Define) 1532 .addMBB(TargetMBB); 1533 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1534 .addReg(AArch64::X0, RegState::Define) 1535 .addReg(AArch64::X0) 1536 .addMBB(TargetMBB) 1537 .addImm(0); 1538 return true; 1539 } 1540 1541 Register Reg = MI.getOperand(0).getReg(); 1542 const GlobalValue *GV = 1543 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1544 const TargetMachine &TM = MBB.getParent()->getTarget(); 1545 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1546 const unsigned char MO_NC = AArch64II::MO_NC; 1547 1548 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1549 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1550 .addGlobalAddress(GV, 0, OpFlags); 1551 if (Subtarget.isTargetILP32()) { 1552 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1553 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1554 .addDef(Reg32, RegState::Dead) 1555 .addUse(Reg, RegState::Kill) 1556 .addImm(0) 1557 .addMemOperand(*MI.memoperands_begin()) 1558 .addDef(Reg, RegState::Implicit); 1559 } else { 1560 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1561 .addReg(Reg, RegState::Kill) 1562 .addImm(0) 1563 .addMemOperand(*MI.memoperands_begin()); 1564 } 1565 } else if (TM.getCodeModel() == CodeModel::Large) { 1566 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); 1567 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1568 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1569 .addImm(0); 1570 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1571 .addReg(Reg, RegState::Kill) 1572 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1573 .addImm(16); 1574 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1575 .addReg(Reg, RegState::Kill) 1576 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1577 .addImm(32); 1578 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1579 .addReg(Reg, RegState::Kill) 1580 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1581 .addImm(48); 1582 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1583 .addReg(Reg, RegState::Kill) 1584 .addImm(0) 1585 .addMemOperand(*MI.memoperands_begin()); 1586 } else if (TM.getCodeModel() == CodeModel::Tiny) { 1587 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 1588 .addGlobalAddress(GV, 0, OpFlags); 1589 } else { 1590 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1591 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1592 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1593 if (Subtarget.isTargetILP32()) { 1594 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); 1595 BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) 1596 .addDef(Reg32, RegState::Dead) 1597 .addUse(Reg, RegState::Kill) 1598 .addGlobalAddress(GV, 0, LoFlags) 1599 .addMemOperand(*MI.memoperands_begin()) 1600 .addDef(Reg, RegState::Implicit); 1601 } else { 1602 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1603 .addReg(Reg, RegState::Kill) 1604 .addGlobalAddress(GV, 0, LoFlags) 1605 .addMemOperand(*MI.memoperands_begin()); 1606 } 1607 } 1608 1609 MBB.erase(MI); 1610 1611 return true; 1612 } 1613 1614 // Return true if this instruction simply sets its single destination register 1615 // to zero. This is equivalent to a register rename of the zero-register. 1616 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 1617 switch (MI.getOpcode()) { 1618 default: 1619 break; 1620 case AArch64::MOVZWi: 1621 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1622 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1623 assert(MI.getDesc().getNumOperands() == 3 && 1624 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1625 return true; 1626 } 1627 break; 1628 case AArch64::ANDWri: // and Rd, Rzr, #imm 1629 return MI.getOperand(1).getReg() == AArch64::WZR; 1630 case AArch64::ANDXri: 1631 return MI.getOperand(1).getReg() == AArch64::XZR; 1632 case TargetOpcode::COPY: 1633 return MI.getOperand(1).getReg() == AArch64::WZR; 1634 } 1635 return false; 1636 } 1637 1638 // Return true if this instruction simply renames a general register without 1639 // modifying bits. 1640 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 1641 switch (MI.getOpcode()) { 1642 default: 1643 break; 1644 case TargetOpcode::COPY: { 1645 // GPR32 copies will by lowered to ORRXrs 1646 Register DstReg = MI.getOperand(0).getReg(); 1647 return (AArch64::GPR32RegClass.contains(DstReg) || 1648 AArch64::GPR64RegClass.contains(DstReg)); 1649 } 1650 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1651 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1652 assert(MI.getDesc().getNumOperands() == 4 && 1653 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1654 return true; 1655 } 1656 break; 1657 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1658 if (MI.getOperand(2).getImm() == 0) { 1659 assert(MI.getDesc().getNumOperands() == 4 && 1660 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1661 return true; 1662 } 1663 break; 1664 } 1665 return false; 1666 } 1667 1668 // Return true if this instruction simply renames a general register without 1669 // modifying bits. 1670 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 1671 switch (MI.getOpcode()) { 1672 default: 1673 break; 1674 case TargetOpcode::COPY: { 1675 // FPR64 copies will by lowered to ORR.16b 1676 Register DstReg = MI.getOperand(0).getReg(); 1677 return (AArch64::FPR64RegClass.contains(DstReg) || 1678 AArch64::FPR128RegClass.contains(DstReg)); 1679 } 1680 case AArch64::ORRv16i8: 1681 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1682 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1683 "invalid ORRv16i8 operands"); 1684 return true; 1685 } 1686 break; 1687 } 1688 return false; 1689 } 1690 1691 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1692 int &FrameIndex) const { 1693 switch (MI.getOpcode()) { 1694 default: 1695 break; 1696 case AArch64::LDRWui: 1697 case AArch64::LDRXui: 1698 case AArch64::LDRBui: 1699 case AArch64::LDRHui: 1700 case AArch64::LDRSui: 1701 case AArch64::LDRDui: 1702 case AArch64::LDRQui: 1703 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1704 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1705 FrameIndex = MI.getOperand(1).getIndex(); 1706 return MI.getOperand(0).getReg(); 1707 } 1708 break; 1709 } 1710 1711 return 0; 1712 } 1713 1714 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1715 int &FrameIndex) const { 1716 switch (MI.getOpcode()) { 1717 default: 1718 break; 1719 case AArch64::STRWui: 1720 case AArch64::STRXui: 1721 case AArch64::STRBui: 1722 case AArch64::STRHui: 1723 case AArch64::STRSui: 1724 case AArch64::STRDui: 1725 case AArch64::STRQui: 1726 case AArch64::LDR_PXI: 1727 case AArch64::STR_PXI: 1728 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1729 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1730 FrameIndex = MI.getOperand(1).getIndex(); 1731 return MI.getOperand(0).getReg(); 1732 } 1733 break; 1734 } 1735 return 0; 1736 } 1737 1738 /// Check all MachineMemOperands for a hint to suppress pairing. 1739 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 1740 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1741 return MMO->getFlags() & MOSuppressPair; 1742 }); 1743 } 1744 1745 /// Set a flag on the first MachineMemOperand to suppress pairing. 1746 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 1747 if (MI.memoperands_empty()) 1748 return; 1749 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1750 } 1751 1752 /// Check all MachineMemOperands for a hint that the load/store is strided. 1753 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 1754 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1755 return MMO->getFlags() & MOStridedAccess; 1756 }); 1757 } 1758 1759 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { 1760 switch (Opc) { 1761 default: 1762 return false; 1763 case AArch64::STURSi: 1764 case AArch64::STURDi: 1765 case AArch64::STURQi: 1766 case AArch64::STURBBi: 1767 case AArch64::STURHHi: 1768 case AArch64::STURWi: 1769 case AArch64::STURXi: 1770 case AArch64::LDURSi: 1771 case AArch64::LDURDi: 1772 case AArch64::LDURQi: 1773 case AArch64::LDURWi: 1774 case AArch64::LDURXi: 1775 case AArch64::LDURSWi: 1776 case AArch64::LDURHHi: 1777 case AArch64::LDURBBi: 1778 case AArch64::LDURSBWi: 1779 case AArch64::LDURSHWi: 1780 return true; 1781 } 1782 } 1783 1784 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 1785 switch (Opc) { 1786 default: return {}; 1787 case AArch64::PRFMui: return AArch64::PRFUMi; 1788 case AArch64::LDRXui: return AArch64::LDURXi; 1789 case AArch64::LDRWui: return AArch64::LDURWi; 1790 case AArch64::LDRBui: return AArch64::LDURBi; 1791 case AArch64::LDRHui: return AArch64::LDURHi; 1792 case AArch64::LDRSui: return AArch64::LDURSi; 1793 case AArch64::LDRDui: return AArch64::LDURDi; 1794 case AArch64::LDRQui: return AArch64::LDURQi; 1795 case AArch64::LDRBBui: return AArch64::LDURBBi; 1796 case AArch64::LDRHHui: return AArch64::LDURHHi; 1797 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 1798 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 1799 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 1800 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 1801 case AArch64::LDRSWui: return AArch64::LDURSWi; 1802 case AArch64::STRXui: return AArch64::STURXi; 1803 case AArch64::STRWui: return AArch64::STURWi; 1804 case AArch64::STRBui: return AArch64::STURBi; 1805 case AArch64::STRHui: return AArch64::STURHi; 1806 case AArch64::STRSui: return AArch64::STURSi; 1807 case AArch64::STRDui: return AArch64::STURDi; 1808 case AArch64::STRQui: return AArch64::STURQi; 1809 case AArch64::STRBBui: return AArch64::STURBBi; 1810 case AArch64::STRHHui: return AArch64::STURHHi; 1811 } 1812 } 1813 1814 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 1815 switch (Opc) { 1816 default: 1817 return 2; 1818 case AArch64::LDPXi: 1819 case AArch64::LDPDi: 1820 case AArch64::STPXi: 1821 case AArch64::STPDi: 1822 case AArch64::LDNPXi: 1823 case AArch64::LDNPDi: 1824 case AArch64::STNPXi: 1825 case AArch64::STNPDi: 1826 case AArch64::LDPQi: 1827 case AArch64::STPQi: 1828 case AArch64::LDNPQi: 1829 case AArch64::STNPQi: 1830 case AArch64::LDPWi: 1831 case AArch64::LDPSi: 1832 case AArch64::STPWi: 1833 case AArch64::STPSi: 1834 case AArch64::LDNPWi: 1835 case AArch64::LDNPSi: 1836 case AArch64::STNPWi: 1837 case AArch64::STNPSi: 1838 case AArch64::LDG: 1839 case AArch64::STGPi: 1840 case AArch64::LD1B_IMM: 1841 case AArch64::LD1H_IMM: 1842 case AArch64::LD1W_IMM: 1843 case AArch64::LD1D_IMM: 1844 case AArch64::ST1B_IMM: 1845 case AArch64::ST1H_IMM: 1846 case AArch64::ST1W_IMM: 1847 case AArch64::ST1D_IMM: 1848 case AArch64::LD1B_H_IMM: 1849 case AArch64::LD1SB_H_IMM: 1850 case AArch64::LD1H_S_IMM: 1851 case AArch64::LD1SH_S_IMM: 1852 case AArch64::LD1W_D_IMM: 1853 case AArch64::LD1SW_D_IMM: 1854 case AArch64::ST1B_H_IMM: 1855 case AArch64::ST1H_S_IMM: 1856 case AArch64::ST1W_D_IMM: 1857 case AArch64::LD1B_S_IMM: 1858 case AArch64::LD1SB_S_IMM: 1859 case AArch64::LD1H_D_IMM: 1860 case AArch64::LD1SH_D_IMM: 1861 case AArch64::ST1B_S_IMM: 1862 case AArch64::ST1H_D_IMM: 1863 case AArch64::LD1B_D_IMM: 1864 case AArch64::LD1SB_D_IMM: 1865 case AArch64::ST1B_D_IMM: 1866 return 3; 1867 case AArch64::ADDG: 1868 case AArch64::STGOffset: 1869 case AArch64::LDR_PXI: 1870 case AArch64::STR_PXI: 1871 return 2; 1872 } 1873 } 1874 1875 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 1876 switch (MI.getOpcode()) { 1877 default: 1878 return false; 1879 // Scaled instructions. 1880 case AArch64::STRSui: 1881 case AArch64::STRDui: 1882 case AArch64::STRQui: 1883 case AArch64::STRXui: 1884 case AArch64::STRWui: 1885 case AArch64::LDRSui: 1886 case AArch64::LDRDui: 1887 case AArch64::LDRQui: 1888 case AArch64::LDRXui: 1889 case AArch64::LDRWui: 1890 case AArch64::LDRSWui: 1891 // Unscaled instructions. 1892 case AArch64::STURSi: 1893 case AArch64::STURDi: 1894 case AArch64::STURQi: 1895 case AArch64::STURWi: 1896 case AArch64::STURXi: 1897 case AArch64::LDURSi: 1898 case AArch64::LDURDi: 1899 case AArch64::LDURQi: 1900 case AArch64::LDURWi: 1901 case AArch64::LDURXi: 1902 case AArch64::LDURSWi: 1903 return true; 1904 } 1905 } 1906 1907 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 1908 bool &Is64Bit) { 1909 switch (Opc) { 1910 default: 1911 llvm_unreachable("Opcode has no flag setting equivalent!"); 1912 // 32-bit cases: 1913 case AArch64::ADDWri: 1914 Is64Bit = false; 1915 return AArch64::ADDSWri; 1916 case AArch64::ADDWrr: 1917 Is64Bit = false; 1918 return AArch64::ADDSWrr; 1919 case AArch64::ADDWrs: 1920 Is64Bit = false; 1921 return AArch64::ADDSWrs; 1922 case AArch64::ADDWrx: 1923 Is64Bit = false; 1924 return AArch64::ADDSWrx; 1925 case AArch64::ANDWri: 1926 Is64Bit = false; 1927 return AArch64::ANDSWri; 1928 case AArch64::ANDWrr: 1929 Is64Bit = false; 1930 return AArch64::ANDSWrr; 1931 case AArch64::ANDWrs: 1932 Is64Bit = false; 1933 return AArch64::ANDSWrs; 1934 case AArch64::BICWrr: 1935 Is64Bit = false; 1936 return AArch64::BICSWrr; 1937 case AArch64::BICWrs: 1938 Is64Bit = false; 1939 return AArch64::BICSWrs; 1940 case AArch64::SUBWri: 1941 Is64Bit = false; 1942 return AArch64::SUBSWri; 1943 case AArch64::SUBWrr: 1944 Is64Bit = false; 1945 return AArch64::SUBSWrr; 1946 case AArch64::SUBWrs: 1947 Is64Bit = false; 1948 return AArch64::SUBSWrs; 1949 case AArch64::SUBWrx: 1950 Is64Bit = false; 1951 return AArch64::SUBSWrx; 1952 // 64-bit cases: 1953 case AArch64::ADDXri: 1954 Is64Bit = true; 1955 return AArch64::ADDSXri; 1956 case AArch64::ADDXrr: 1957 Is64Bit = true; 1958 return AArch64::ADDSXrr; 1959 case AArch64::ADDXrs: 1960 Is64Bit = true; 1961 return AArch64::ADDSXrs; 1962 case AArch64::ADDXrx: 1963 Is64Bit = true; 1964 return AArch64::ADDSXrx; 1965 case AArch64::ANDXri: 1966 Is64Bit = true; 1967 return AArch64::ANDSXri; 1968 case AArch64::ANDXrr: 1969 Is64Bit = true; 1970 return AArch64::ANDSXrr; 1971 case AArch64::ANDXrs: 1972 Is64Bit = true; 1973 return AArch64::ANDSXrs; 1974 case AArch64::BICXrr: 1975 Is64Bit = true; 1976 return AArch64::BICSXrr; 1977 case AArch64::BICXrs: 1978 Is64Bit = true; 1979 return AArch64::BICSXrs; 1980 case AArch64::SUBXri: 1981 Is64Bit = true; 1982 return AArch64::SUBSXri; 1983 case AArch64::SUBXrr: 1984 Is64Bit = true; 1985 return AArch64::SUBSXrr; 1986 case AArch64::SUBXrs: 1987 Is64Bit = true; 1988 return AArch64::SUBSXrs; 1989 case AArch64::SUBXrx: 1990 Is64Bit = true; 1991 return AArch64::SUBSXrx; 1992 } 1993 } 1994 1995 // Is this a candidate for ld/st merging or pairing? For example, we don't 1996 // touch volatiles or load/stores that have a hint to avoid pair formation. 1997 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 1998 // If this is a volatile load/store, don't mess with it. 1999 if (MI.hasOrderedMemoryRef()) 2000 return false; 2001 2002 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 2003 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && 2004 "Expected a reg or frame index operand."); 2005 if (!MI.getOperand(2).isImm()) 2006 return false; 2007 2008 // Can't merge/pair if the instruction modifies the base register. 2009 // e.g., ldr x0, [x0] 2010 // This case will never occur with an FI base. 2011 if (MI.getOperand(1).isReg()) { 2012 Register BaseReg = MI.getOperand(1).getReg(); 2013 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2014 if (MI.modifiesRegister(BaseReg, TRI)) 2015 return false; 2016 } 2017 2018 // Check if this load/store has a hint to avoid pair formation. 2019 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 2020 if (isLdStPairSuppressed(MI)) 2021 return false; 2022 2023 // Do not pair any callee-save store/reload instructions in the 2024 // prologue/epilogue if the CFI information encoded the operations as separate 2025 // instructions, as that will cause the size of the actual prologue to mismatch 2026 // with the prologue size recorded in the Windows CFI. 2027 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 2028 bool NeedsWinCFI = MAI->usesWindowsCFI() && 2029 MI.getMF()->getFunction().needsUnwindTableEntry(); 2030 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 2031 MI.getFlag(MachineInstr::FrameDestroy))) 2032 return false; 2033 2034 // On some CPUs quad load/store pairs are slower than two single load/stores. 2035 if (Subtarget.isPaired128Slow()) { 2036 switch (MI.getOpcode()) { 2037 default: 2038 break; 2039 case AArch64::LDURQi: 2040 case AArch64::STURQi: 2041 case AArch64::LDRQui: 2042 case AArch64::STRQui: 2043 return false; 2044 } 2045 } 2046 2047 return true; 2048 } 2049 2050 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( 2051 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2052 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2053 const TargetRegisterInfo *TRI) const { 2054 if (!LdSt.mayLoadOrStore()) 2055 return false; 2056 2057 const MachineOperand *BaseOp; 2058 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable, 2059 Width, TRI)) 2060 return false; 2061 BaseOps.push_back(BaseOp); 2062 return true; 2063 } 2064 2065 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 2066 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 2067 bool &OffsetIsScalable, unsigned &Width, 2068 const TargetRegisterInfo *TRI) const { 2069 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2070 // Handle only loads/stores with base register followed by immediate offset. 2071 if (LdSt.getNumExplicitOperands() == 3) { 2072 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 2073 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2074 !LdSt.getOperand(2).isImm()) 2075 return false; 2076 } else if (LdSt.getNumExplicitOperands() == 4) { 2077 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 2078 if (!LdSt.getOperand(1).isReg() || 2079 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 2080 !LdSt.getOperand(3).isImm()) 2081 return false; 2082 } else 2083 return false; 2084 2085 // Get the scaling factor for the instruction and set the width for the 2086 // instruction. 2087 TypeSize Scale(0U, false); 2088 int64_t Dummy1, Dummy2; 2089 2090 // If this returns false, then it's an instruction we don't want to handle. 2091 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 2092 return false; 2093 2094 // Compute the offset. Offset is calculated as the immediate operand 2095 // multiplied by the scaling factor. Unscaled instructions have scaling factor 2096 // set to 1. 2097 if (LdSt.getNumExplicitOperands() == 3) { 2098 BaseOp = &LdSt.getOperand(1); 2099 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize(); 2100 } else { 2101 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2102 BaseOp = &LdSt.getOperand(2); 2103 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize(); 2104 } 2105 OffsetIsScalable = Scale.isScalable(); 2106 2107 if (!BaseOp->isReg() && !BaseOp->isFI()) 2108 return false; 2109 2110 return true; 2111 } 2112 2113 MachineOperand & 2114 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2115 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2116 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2117 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2118 return OfsOp; 2119 } 2120 2121 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, 2122 unsigned &Width, int64_t &MinOffset, 2123 int64_t &MaxOffset) { 2124 const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; 2125 switch (Opcode) { 2126 // Not a memory operation or something we want to handle. 2127 default: 2128 Scale = TypeSize::Fixed(0); 2129 Width = 0; 2130 MinOffset = MaxOffset = 0; 2131 return false; 2132 case AArch64::STRWpost: 2133 case AArch64::LDRWpost: 2134 Width = 32; 2135 Scale = TypeSize::Fixed(4); 2136 MinOffset = -256; 2137 MaxOffset = 255; 2138 break; 2139 case AArch64::LDURQi: 2140 case AArch64::STURQi: 2141 Width = 16; 2142 Scale = TypeSize::Fixed(1); 2143 MinOffset = -256; 2144 MaxOffset = 255; 2145 break; 2146 case AArch64::PRFUMi: 2147 case AArch64::LDURXi: 2148 case AArch64::LDURDi: 2149 case AArch64::STURXi: 2150 case AArch64::STURDi: 2151 Width = 8; 2152 Scale = TypeSize::Fixed(1); 2153 MinOffset = -256; 2154 MaxOffset = 255; 2155 break; 2156 case AArch64::LDURWi: 2157 case AArch64::LDURSi: 2158 case AArch64::LDURSWi: 2159 case AArch64::STURWi: 2160 case AArch64::STURSi: 2161 Width = 4; 2162 Scale = TypeSize::Fixed(1); 2163 MinOffset = -256; 2164 MaxOffset = 255; 2165 break; 2166 case AArch64::LDURHi: 2167 case AArch64::LDURHHi: 2168 case AArch64::LDURSHXi: 2169 case AArch64::LDURSHWi: 2170 case AArch64::STURHi: 2171 case AArch64::STURHHi: 2172 Width = 2; 2173 Scale = TypeSize::Fixed(1); 2174 MinOffset = -256; 2175 MaxOffset = 255; 2176 break; 2177 case AArch64::LDURBi: 2178 case AArch64::LDURBBi: 2179 case AArch64::LDURSBXi: 2180 case AArch64::LDURSBWi: 2181 case AArch64::STURBi: 2182 case AArch64::STURBBi: 2183 Width = 1; 2184 Scale = TypeSize::Fixed(1); 2185 MinOffset = -256; 2186 MaxOffset = 255; 2187 break; 2188 case AArch64::LDPQi: 2189 case AArch64::LDNPQi: 2190 case AArch64::STPQi: 2191 case AArch64::STNPQi: 2192 Scale = TypeSize::Fixed(16); 2193 Width = 32; 2194 MinOffset = -64; 2195 MaxOffset = 63; 2196 break; 2197 case AArch64::LDRQui: 2198 case AArch64::STRQui: 2199 Scale = TypeSize::Fixed(16); 2200 Width = 16; 2201 MinOffset = 0; 2202 MaxOffset = 4095; 2203 break; 2204 case AArch64::LDPXi: 2205 case AArch64::LDPDi: 2206 case AArch64::LDNPXi: 2207 case AArch64::LDNPDi: 2208 case AArch64::STPXi: 2209 case AArch64::STPDi: 2210 case AArch64::STNPXi: 2211 case AArch64::STNPDi: 2212 Scale = TypeSize::Fixed(8); 2213 Width = 16; 2214 MinOffset = -64; 2215 MaxOffset = 63; 2216 break; 2217 case AArch64::PRFMui: 2218 case AArch64::LDRXui: 2219 case AArch64::LDRDui: 2220 case AArch64::STRXui: 2221 case AArch64::STRDui: 2222 Scale = TypeSize::Fixed(8); 2223 Width = 8; 2224 MinOffset = 0; 2225 MaxOffset = 4095; 2226 break; 2227 case AArch64::LDPWi: 2228 case AArch64::LDPSi: 2229 case AArch64::LDNPWi: 2230 case AArch64::LDNPSi: 2231 case AArch64::STPWi: 2232 case AArch64::STPSi: 2233 case AArch64::STNPWi: 2234 case AArch64::STNPSi: 2235 Scale = TypeSize::Fixed(4); 2236 Width = 8; 2237 MinOffset = -64; 2238 MaxOffset = 63; 2239 break; 2240 case AArch64::LDRWui: 2241 case AArch64::LDRSui: 2242 case AArch64::LDRSWui: 2243 case AArch64::STRWui: 2244 case AArch64::STRSui: 2245 Scale = TypeSize::Fixed(4); 2246 Width = 4; 2247 MinOffset = 0; 2248 MaxOffset = 4095; 2249 break; 2250 case AArch64::LDRHui: 2251 case AArch64::LDRHHui: 2252 case AArch64::LDRSHWui: 2253 case AArch64::LDRSHXui: 2254 case AArch64::STRHui: 2255 case AArch64::STRHHui: 2256 Scale = TypeSize::Fixed(2); 2257 Width = 2; 2258 MinOffset = 0; 2259 MaxOffset = 4095; 2260 break; 2261 case AArch64::LDRBui: 2262 case AArch64::LDRBBui: 2263 case AArch64::LDRSBWui: 2264 case AArch64::LDRSBXui: 2265 case AArch64::STRBui: 2266 case AArch64::STRBBui: 2267 Scale = TypeSize::Fixed(1); 2268 Width = 1; 2269 MinOffset = 0; 2270 MaxOffset = 4095; 2271 break; 2272 case AArch64::ADDG: 2273 Scale = TypeSize::Fixed(16); 2274 Width = 0; 2275 MinOffset = 0; 2276 MaxOffset = 63; 2277 break; 2278 case AArch64::TAGPstack: 2279 Scale = TypeSize::Fixed(16); 2280 Width = 0; 2281 // TAGP with a negative offset turns into SUBP, which has a maximum offset 2282 // of 63 (not 64!). 2283 MinOffset = -63; 2284 MaxOffset = 63; 2285 break; 2286 case AArch64::LDG: 2287 case AArch64::STGOffset: 2288 case AArch64::STZGOffset: 2289 Scale = TypeSize::Fixed(16); 2290 Width = 16; 2291 MinOffset = -256; 2292 MaxOffset = 255; 2293 break; 2294 case AArch64::STR_ZZZZXI: 2295 case AArch64::LDR_ZZZZXI: 2296 Scale = TypeSize::Scalable(16); 2297 Width = SVEMaxBytesPerVector * 4; 2298 MinOffset = -256; 2299 MaxOffset = 252; 2300 break; 2301 case AArch64::STR_ZZZXI: 2302 case AArch64::LDR_ZZZXI: 2303 Scale = TypeSize::Scalable(16); 2304 Width = SVEMaxBytesPerVector * 3; 2305 MinOffset = -256; 2306 MaxOffset = 253; 2307 break; 2308 case AArch64::STR_ZZXI: 2309 case AArch64::LDR_ZZXI: 2310 Scale = TypeSize::Scalable(16); 2311 Width = SVEMaxBytesPerVector * 2; 2312 MinOffset = -256; 2313 MaxOffset = 254; 2314 break; 2315 case AArch64::LDR_PXI: 2316 case AArch64::STR_PXI: 2317 Scale = TypeSize::Scalable(2); 2318 Width = SVEMaxBytesPerVector / 8; 2319 MinOffset = -256; 2320 MaxOffset = 255; 2321 break; 2322 case AArch64::LDR_ZXI: 2323 case AArch64::STR_ZXI: 2324 Scale = TypeSize::Scalable(16); 2325 Width = SVEMaxBytesPerVector; 2326 MinOffset = -256; 2327 MaxOffset = 255; 2328 break; 2329 case AArch64::LD1B_IMM: 2330 case AArch64::LD1H_IMM: 2331 case AArch64::LD1W_IMM: 2332 case AArch64::LD1D_IMM: 2333 case AArch64::ST1B_IMM: 2334 case AArch64::ST1H_IMM: 2335 case AArch64::ST1W_IMM: 2336 case AArch64::ST1D_IMM: 2337 // A full vectors worth of data 2338 // Width = mbytes * elements 2339 Scale = TypeSize::Scalable(16); 2340 Width = SVEMaxBytesPerVector; 2341 MinOffset = -8; 2342 MaxOffset = 7; 2343 break; 2344 case AArch64::LD1B_H_IMM: 2345 case AArch64::LD1SB_H_IMM: 2346 case AArch64::LD1H_S_IMM: 2347 case AArch64::LD1SH_S_IMM: 2348 case AArch64::LD1W_D_IMM: 2349 case AArch64::LD1SW_D_IMM: 2350 case AArch64::ST1B_H_IMM: 2351 case AArch64::ST1H_S_IMM: 2352 case AArch64::ST1W_D_IMM: 2353 // A half vector worth of data 2354 // Width = mbytes * elements 2355 Scale = TypeSize::Scalable(8); 2356 Width = SVEMaxBytesPerVector / 2; 2357 MinOffset = -8; 2358 MaxOffset = 7; 2359 break; 2360 case AArch64::LD1B_S_IMM: 2361 case AArch64::LD1SB_S_IMM: 2362 case AArch64::LD1H_D_IMM: 2363 case AArch64::LD1SH_D_IMM: 2364 case AArch64::ST1B_S_IMM: 2365 case AArch64::ST1H_D_IMM: 2366 // A quarter vector worth of data 2367 // Width = mbytes * elements 2368 Scale = TypeSize::Scalable(4); 2369 Width = SVEMaxBytesPerVector / 4; 2370 MinOffset = -8; 2371 MaxOffset = 7; 2372 break; 2373 case AArch64::LD1B_D_IMM: 2374 case AArch64::LD1SB_D_IMM: 2375 case AArch64::ST1B_D_IMM: 2376 // A eighth vector worth of data 2377 // Width = mbytes * elements 2378 Scale = TypeSize::Scalable(2); 2379 Width = SVEMaxBytesPerVector / 8; 2380 MinOffset = -8; 2381 MaxOffset = 7; 2382 break; 2383 case AArch64::ST2GOffset: 2384 case AArch64::STZ2GOffset: 2385 Scale = TypeSize::Fixed(16); 2386 Width = 32; 2387 MinOffset = -256; 2388 MaxOffset = 255; 2389 break; 2390 case AArch64::STGPi: 2391 Scale = TypeSize::Fixed(16); 2392 Width = 16; 2393 MinOffset = -64; 2394 MaxOffset = 63; 2395 break; 2396 } 2397 2398 return true; 2399 } 2400 2401 // Scaling factor for unscaled load or store. 2402 int AArch64InstrInfo::getMemScale(unsigned Opc) { 2403 switch (Opc) { 2404 default: 2405 llvm_unreachable("Opcode has unknown scale!"); 2406 case AArch64::LDRBBui: 2407 case AArch64::LDURBBi: 2408 case AArch64::LDRSBWui: 2409 case AArch64::LDURSBWi: 2410 case AArch64::STRBBui: 2411 case AArch64::STURBBi: 2412 return 1; 2413 case AArch64::LDRHHui: 2414 case AArch64::LDURHHi: 2415 case AArch64::LDRSHWui: 2416 case AArch64::LDURSHWi: 2417 case AArch64::STRHHui: 2418 case AArch64::STURHHi: 2419 return 2; 2420 case AArch64::LDRSui: 2421 case AArch64::LDURSi: 2422 case AArch64::LDRSWui: 2423 case AArch64::LDURSWi: 2424 case AArch64::LDRWui: 2425 case AArch64::LDURWi: 2426 case AArch64::STRSui: 2427 case AArch64::STURSi: 2428 case AArch64::STRWui: 2429 case AArch64::STURWi: 2430 case AArch64::LDPSi: 2431 case AArch64::LDPSWi: 2432 case AArch64::LDPWi: 2433 case AArch64::STPSi: 2434 case AArch64::STPWi: 2435 return 4; 2436 case AArch64::LDRDui: 2437 case AArch64::LDURDi: 2438 case AArch64::LDRXui: 2439 case AArch64::LDURXi: 2440 case AArch64::STRDui: 2441 case AArch64::STURDi: 2442 case AArch64::STRXui: 2443 case AArch64::STURXi: 2444 case AArch64::LDPDi: 2445 case AArch64::LDPXi: 2446 case AArch64::STPDi: 2447 case AArch64::STPXi: 2448 return 8; 2449 case AArch64::LDRQui: 2450 case AArch64::LDURQi: 2451 case AArch64::STRQui: 2452 case AArch64::STURQi: 2453 case AArch64::LDPQi: 2454 case AArch64::STPQi: 2455 case AArch64::STGOffset: 2456 case AArch64::STZGOffset: 2457 case AArch64::ST2GOffset: 2458 case AArch64::STZ2GOffset: 2459 case AArch64::STGPi: 2460 return 16; 2461 } 2462 } 2463 2464 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 2465 // scaled. 2466 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 2467 int Scale = AArch64InstrInfo::getMemScale(Opc); 2468 2469 // If the byte-offset isn't a multiple of the stride, we can't scale this 2470 // offset. 2471 if (Offset % Scale != 0) 2472 return false; 2473 2474 // Convert the byte-offset used by unscaled into an "element" offset used 2475 // by the scaled pair load/store instructions. 2476 Offset /= Scale; 2477 return true; 2478 } 2479 2480 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 2481 if (FirstOpc == SecondOpc) 2482 return true; 2483 // We can also pair sign-ext and zero-ext instructions. 2484 switch (FirstOpc) { 2485 default: 2486 return false; 2487 case AArch64::LDRWui: 2488 case AArch64::LDURWi: 2489 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 2490 case AArch64::LDRSWui: 2491 case AArch64::LDURSWi: 2492 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 2493 } 2494 // These instructions can't be paired based on their opcodes. 2495 return false; 2496 } 2497 2498 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 2499 int64_t Offset1, unsigned Opcode1, int FI2, 2500 int64_t Offset2, unsigned Opcode2) { 2501 // Accesses through fixed stack object frame indices may access a different 2502 // fixed stack slot. Check that the object offsets + offsets match. 2503 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 2504 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 2505 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 2506 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 2507 // Convert to scaled object offsets. 2508 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); 2509 if (ObjectOffset1 % Scale1 != 0) 2510 return false; 2511 ObjectOffset1 /= Scale1; 2512 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); 2513 if (ObjectOffset2 % Scale2 != 0) 2514 return false; 2515 ObjectOffset2 /= Scale2; 2516 ObjectOffset1 += Offset1; 2517 ObjectOffset2 += Offset2; 2518 return ObjectOffset1 + 1 == ObjectOffset2; 2519 } 2520 2521 return FI1 == FI2; 2522 } 2523 2524 /// Detect opportunities for ldp/stp formation. 2525 /// 2526 /// Only called for LdSt for which getMemOperandWithOffset returns true. 2527 bool AArch64InstrInfo::shouldClusterMemOps( 2528 ArrayRef<const MachineOperand *> BaseOps1, 2529 ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, 2530 unsigned NumBytes) const { 2531 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); 2532 const MachineOperand &BaseOp1 = *BaseOps1.front(); 2533 const MachineOperand &BaseOp2 = *BaseOps2.front(); 2534 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 2535 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 2536 if (BaseOp1.getType() != BaseOp2.getType()) 2537 return false; 2538 2539 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 2540 "Only base registers and frame indices are supported."); 2541 2542 // Check for both base regs and base FI. 2543 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 2544 return false; 2545 2546 // Only cluster up to a single pair. 2547 if (NumLoads > 2) 2548 return false; 2549 2550 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 2551 return false; 2552 2553 // Can we pair these instructions based on their opcodes? 2554 unsigned FirstOpc = FirstLdSt.getOpcode(); 2555 unsigned SecondOpc = SecondLdSt.getOpcode(); 2556 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 2557 return false; 2558 2559 // Can't merge volatiles or load/stores that have a hint to avoid pair 2560 // formation, for example. 2561 if (!isCandidateToMergeOrPair(FirstLdSt) || 2562 !isCandidateToMergeOrPair(SecondLdSt)) 2563 return false; 2564 2565 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 2566 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 2567 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 2568 return false; 2569 2570 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 2571 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 2572 return false; 2573 2574 // Pairwise instructions have a 7-bit signed offset field. 2575 if (Offset1 > 63 || Offset1 < -64) 2576 return false; 2577 2578 // The caller should already have ordered First/SecondLdSt by offset. 2579 // Note: except for non-equal frame index bases 2580 if (BaseOp1.isFI()) { 2581 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 2582 "Caller should have ordered offsets."); 2583 2584 const MachineFrameInfo &MFI = 2585 FirstLdSt.getParent()->getParent()->getFrameInfo(); 2586 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 2587 BaseOp2.getIndex(), Offset2, SecondOpc); 2588 } 2589 2590 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 2591 2592 return Offset1 + 1 == Offset2; 2593 } 2594 2595 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2596 unsigned Reg, unsigned SubIdx, 2597 unsigned State, 2598 const TargetRegisterInfo *TRI) { 2599 if (!SubIdx) 2600 return MIB.addReg(Reg, State); 2601 2602 if (Register::isPhysicalRegister(Reg)) 2603 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2604 return MIB.addReg(Reg, State, SubIdx); 2605 } 2606 2607 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2608 unsigned NumRegs) { 2609 // We really want the positive remainder mod 32 here, that happens to be 2610 // easily obtainable with a mask. 2611 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2612 } 2613 2614 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 2615 MachineBasicBlock::iterator I, 2616 const DebugLoc &DL, MCRegister DestReg, 2617 MCRegister SrcReg, bool KillSrc, 2618 unsigned Opcode, 2619 ArrayRef<unsigned> Indices) const { 2620 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 2621 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2622 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2623 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2624 unsigned NumRegs = Indices.size(); 2625 2626 int SubReg = 0, End = NumRegs, Incr = 1; 2627 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2628 SubReg = NumRegs - 1; 2629 End = -1; 2630 Incr = -1; 2631 } 2632 2633 for (; SubReg != End; SubReg += Incr) { 2634 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2635 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2636 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2637 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2638 } 2639 } 2640 2641 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 2642 MachineBasicBlock::iterator I, 2643 DebugLoc DL, unsigned DestReg, 2644 unsigned SrcReg, bool KillSrc, 2645 unsigned Opcode, unsigned ZeroReg, 2646 llvm::ArrayRef<unsigned> Indices) const { 2647 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2648 unsigned NumRegs = Indices.size(); 2649 2650 #ifndef NDEBUG 2651 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2652 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2653 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 2654 "GPR reg sequences should not be able to overlap"); 2655 #endif 2656 2657 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 2658 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2659 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2660 MIB.addReg(ZeroReg); 2661 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2662 MIB.addImm(0); 2663 } 2664 } 2665 2666 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2667 MachineBasicBlock::iterator I, 2668 const DebugLoc &DL, MCRegister DestReg, 2669 MCRegister SrcReg, bool KillSrc) const { 2670 if (AArch64::GPR32spRegClass.contains(DestReg) && 2671 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2672 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2673 2674 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2675 // If either operand is WSP, expand to ADD #0. 2676 if (Subtarget.hasZeroCycleRegMove()) { 2677 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2678 MCRegister DestRegX = TRI->getMatchingSuperReg( 2679 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2680 MCRegister SrcRegX = TRI->getMatchingSuperReg( 2681 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2682 // This instruction is reading and writing X registers. This may upset 2683 // the register scavenger and machine verifier, so we need to indicate 2684 // that we are reading an undefined value from SrcRegX, but a proper 2685 // value from SrcReg. 2686 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2687 .addReg(SrcRegX, RegState::Undef) 2688 .addImm(0) 2689 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2690 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2691 } else { 2692 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2693 .addReg(SrcReg, getKillRegState(KillSrc)) 2694 .addImm(0) 2695 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2696 } 2697 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 2698 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 2699 .addImm(0) 2700 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2701 } else { 2702 if (Subtarget.hasZeroCycleRegMove()) { 2703 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2704 MCRegister DestRegX = TRI->getMatchingSuperReg( 2705 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2706 MCRegister SrcRegX = TRI->getMatchingSuperReg( 2707 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); 2708 // This instruction is reading and writing X registers. This may upset 2709 // the register scavenger and machine verifier, so we need to indicate 2710 // that we are reading an undefined value from SrcRegX, but a proper 2711 // value from SrcReg. 2712 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2713 .addReg(AArch64::XZR) 2714 .addReg(SrcRegX, RegState::Undef) 2715 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2716 } else { 2717 // Otherwise, expand to ORR WZR. 2718 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2719 .addReg(AArch64::WZR) 2720 .addReg(SrcReg, getKillRegState(KillSrc)); 2721 } 2722 } 2723 return; 2724 } 2725 2726 // Copy a Predicate register by ORRing with itself. 2727 if (AArch64::PPRRegClass.contains(DestReg) && 2728 AArch64::PPRRegClass.contains(SrcReg)) { 2729 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 2730 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) 2731 .addReg(SrcReg) // Pg 2732 .addReg(SrcReg) 2733 .addReg(SrcReg, getKillRegState(KillSrc)); 2734 return; 2735 } 2736 2737 // Copy a Z register by ORRing with itself. 2738 if (AArch64::ZPRRegClass.contains(DestReg) && 2739 AArch64::ZPRRegClass.contains(SrcReg)) { 2740 assert(Subtarget.hasSVE() && "Unexpected SVE register."); 2741 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) 2742 .addReg(SrcReg) 2743 .addReg(SrcReg, getKillRegState(KillSrc)); 2744 return; 2745 } 2746 2747 if (AArch64::GPR64spRegClass.contains(DestReg) && 2748 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2749 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2750 // If either operand is SP, expand to ADD #0. 2751 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2752 .addReg(SrcReg, getKillRegState(KillSrc)) 2753 .addImm(0) 2754 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2755 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 2756 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 2757 .addImm(0) 2758 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2759 } else { 2760 // Otherwise, expand to ORR XZR. 2761 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2762 .addReg(AArch64::XZR) 2763 .addReg(SrcReg, getKillRegState(KillSrc)); 2764 } 2765 return; 2766 } 2767 2768 // Copy a DDDD register quad by copying the individual sub-registers. 2769 if (AArch64::DDDDRegClass.contains(DestReg) && 2770 AArch64::DDDDRegClass.contains(SrcReg)) { 2771 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2772 AArch64::dsub2, AArch64::dsub3}; 2773 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2774 Indices); 2775 return; 2776 } 2777 2778 // Copy a DDD register triple by copying the individual sub-registers. 2779 if (AArch64::DDDRegClass.contains(DestReg) && 2780 AArch64::DDDRegClass.contains(SrcReg)) { 2781 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2782 AArch64::dsub2}; 2783 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2784 Indices); 2785 return; 2786 } 2787 2788 // Copy a DD register pair by copying the individual sub-registers. 2789 if (AArch64::DDRegClass.contains(DestReg) && 2790 AArch64::DDRegClass.contains(SrcReg)) { 2791 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 2792 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2793 Indices); 2794 return; 2795 } 2796 2797 // Copy a QQQQ register quad by copying the individual sub-registers. 2798 if (AArch64::QQQQRegClass.contains(DestReg) && 2799 AArch64::QQQQRegClass.contains(SrcReg)) { 2800 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2801 AArch64::qsub2, AArch64::qsub3}; 2802 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2803 Indices); 2804 return; 2805 } 2806 2807 // Copy a QQQ register triple by copying the individual sub-registers. 2808 if (AArch64::QQQRegClass.contains(DestReg) && 2809 AArch64::QQQRegClass.contains(SrcReg)) { 2810 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2811 AArch64::qsub2}; 2812 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2813 Indices); 2814 return; 2815 } 2816 2817 // Copy a QQ register pair by copying the individual sub-registers. 2818 if (AArch64::QQRegClass.contains(DestReg) && 2819 AArch64::QQRegClass.contains(SrcReg)) { 2820 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 2821 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2822 Indices); 2823 return; 2824 } 2825 2826 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 2827 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 2828 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 2829 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 2830 AArch64::XZR, Indices); 2831 return; 2832 } 2833 2834 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 2835 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 2836 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 2837 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 2838 AArch64::WZR, Indices); 2839 return; 2840 } 2841 2842 if (AArch64::FPR128RegClass.contains(DestReg) && 2843 AArch64::FPR128RegClass.contains(SrcReg)) { 2844 if (Subtarget.hasNEON()) { 2845 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2846 .addReg(SrcReg) 2847 .addReg(SrcReg, getKillRegState(KillSrc)); 2848 } else { 2849 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2850 .addReg(AArch64::SP, RegState::Define) 2851 .addReg(SrcReg, getKillRegState(KillSrc)) 2852 .addReg(AArch64::SP) 2853 .addImm(-16); 2854 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2855 .addReg(AArch64::SP, RegState::Define) 2856 .addReg(DestReg, RegState::Define) 2857 .addReg(AArch64::SP) 2858 .addImm(16); 2859 } 2860 return; 2861 } 2862 2863 if (AArch64::FPR64RegClass.contains(DestReg) && 2864 AArch64::FPR64RegClass.contains(SrcReg)) { 2865 if (Subtarget.hasNEON()) { 2866 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2867 &AArch64::FPR128RegClass); 2868 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2869 &AArch64::FPR128RegClass); 2870 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2871 .addReg(SrcReg) 2872 .addReg(SrcReg, getKillRegState(KillSrc)); 2873 } else { 2874 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2875 .addReg(SrcReg, getKillRegState(KillSrc)); 2876 } 2877 return; 2878 } 2879 2880 if (AArch64::FPR32RegClass.contains(DestReg) && 2881 AArch64::FPR32RegClass.contains(SrcReg)) { 2882 if (Subtarget.hasNEON()) { 2883 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2884 &AArch64::FPR128RegClass); 2885 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2886 &AArch64::FPR128RegClass); 2887 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2888 .addReg(SrcReg) 2889 .addReg(SrcReg, getKillRegState(KillSrc)); 2890 } else { 2891 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2892 .addReg(SrcReg, getKillRegState(KillSrc)); 2893 } 2894 return; 2895 } 2896 2897 if (AArch64::FPR16RegClass.contains(DestReg) && 2898 AArch64::FPR16RegClass.contains(SrcReg)) { 2899 if (Subtarget.hasNEON()) { 2900 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2901 &AArch64::FPR128RegClass); 2902 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2903 &AArch64::FPR128RegClass); 2904 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2905 .addReg(SrcReg) 2906 .addReg(SrcReg, getKillRegState(KillSrc)); 2907 } else { 2908 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2909 &AArch64::FPR32RegClass); 2910 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2911 &AArch64::FPR32RegClass); 2912 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2913 .addReg(SrcReg, getKillRegState(KillSrc)); 2914 } 2915 return; 2916 } 2917 2918 if (AArch64::FPR8RegClass.contains(DestReg) && 2919 AArch64::FPR8RegClass.contains(SrcReg)) { 2920 if (Subtarget.hasNEON()) { 2921 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2922 &AArch64::FPR128RegClass); 2923 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2924 &AArch64::FPR128RegClass); 2925 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2926 .addReg(SrcReg) 2927 .addReg(SrcReg, getKillRegState(KillSrc)); 2928 } else { 2929 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2930 &AArch64::FPR32RegClass); 2931 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2932 &AArch64::FPR32RegClass); 2933 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2934 .addReg(SrcReg, getKillRegState(KillSrc)); 2935 } 2936 return; 2937 } 2938 2939 // Copies between GPR64 and FPR64. 2940 if (AArch64::FPR64RegClass.contains(DestReg) && 2941 AArch64::GPR64RegClass.contains(SrcReg)) { 2942 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2943 .addReg(SrcReg, getKillRegState(KillSrc)); 2944 return; 2945 } 2946 if (AArch64::GPR64RegClass.contains(DestReg) && 2947 AArch64::FPR64RegClass.contains(SrcReg)) { 2948 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2949 .addReg(SrcReg, getKillRegState(KillSrc)); 2950 return; 2951 } 2952 // Copies between GPR32 and FPR32. 2953 if (AArch64::FPR32RegClass.contains(DestReg) && 2954 AArch64::GPR32RegClass.contains(SrcReg)) { 2955 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2956 .addReg(SrcReg, getKillRegState(KillSrc)); 2957 return; 2958 } 2959 if (AArch64::GPR32RegClass.contains(DestReg) && 2960 AArch64::FPR32RegClass.contains(SrcReg)) { 2961 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2962 .addReg(SrcReg, getKillRegState(KillSrc)); 2963 return; 2964 } 2965 2966 if (DestReg == AArch64::NZCV) { 2967 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2968 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2969 .addImm(AArch64SysReg::NZCV) 2970 .addReg(SrcReg, getKillRegState(KillSrc)) 2971 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2972 return; 2973 } 2974 2975 if (SrcReg == AArch64::NZCV) { 2976 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2977 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2978 .addImm(AArch64SysReg::NZCV) 2979 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2980 return; 2981 } 2982 2983 llvm_unreachable("unimplemented reg-to-reg copy"); 2984 } 2985 2986 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 2987 MachineBasicBlock &MBB, 2988 MachineBasicBlock::iterator InsertBefore, 2989 const MCInstrDesc &MCID, 2990 Register SrcReg, bool IsKill, 2991 unsigned SubIdx0, unsigned SubIdx1, int FI, 2992 MachineMemOperand *MMO) { 2993 Register SrcReg0 = SrcReg; 2994 Register SrcReg1 = SrcReg; 2995 if (Register::isPhysicalRegister(SrcReg)) { 2996 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 2997 SubIdx0 = 0; 2998 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 2999 SubIdx1 = 0; 3000 } 3001 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3002 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 3003 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 3004 .addFrameIndex(FI) 3005 .addImm(0) 3006 .addMemOperand(MMO); 3007 } 3008 3009 void AArch64InstrInfo::storeRegToStackSlot( 3010 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, 3011 bool isKill, int FI, const TargetRegisterClass *RC, 3012 const TargetRegisterInfo *TRI) const { 3013 MachineFunction &MF = *MBB.getParent(); 3014 MachineFrameInfo &MFI = MF.getFrameInfo(); 3015 3016 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3017 MachineMemOperand *MMO = 3018 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 3019 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3020 unsigned Opc = 0; 3021 bool Offset = true; 3022 unsigned StackID = TargetStackID::Default; 3023 switch (TRI->getSpillSize(*RC)) { 3024 case 1: 3025 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3026 Opc = AArch64::STRBui; 3027 break; 3028 case 2: 3029 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3030 Opc = AArch64::STRHui; 3031 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3032 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3033 Opc = AArch64::STR_PXI; 3034 StackID = TargetStackID::SVEVector; 3035 } 3036 break; 3037 case 4: 3038 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3039 Opc = AArch64::STRWui; 3040 if (Register::isVirtualRegister(SrcReg)) 3041 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 3042 else 3043 assert(SrcReg != AArch64::WSP); 3044 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3045 Opc = AArch64::STRSui; 3046 break; 3047 case 8: 3048 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3049 Opc = AArch64::STRXui; 3050 if (Register::isVirtualRegister(SrcReg)) 3051 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3052 else 3053 assert(SrcReg != AArch64::SP); 3054 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3055 Opc = AArch64::STRDui; 3056 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3057 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3058 get(AArch64::STPWi), SrcReg, isKill, 3059 AArch64::sube32, AArch64::subo32, FI, MMO); 3060 return; 3061 } 3062 break; 3063 case 16: 3064 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3065 Opc = AArch64::STRQui; 3066 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3067 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3068 Opc = AArch64::ST1Twov1d; 3069 Offset = false; 3070 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3071 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 3072 get(AArch64::STPXi), SrcReg, isKill, 3073 AArch64::sube64, AArch64::subo64, FI, MMO); 3074 return; 3075 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3076 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3077 Opc = AArch64::STR_ZXI; 3078 StackID = TargetStackID::SVEVector; 3079 } 3080 break; 3081 case 24: 3082 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3083 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3084 Opc = AArch64::ST1Threev1d; 3085 Offset = false; 3086 } 3087 break; 3088 case 32: 3089 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3090 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3091 Opc = AArch64::ST1Fourv1d; 3092 Offset = false; 3093 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3094 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3095 Opc = AArch64::ST1Twov2d; 3096 Offset = false; 3097 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3098 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3099 Opc = AArch64::STR_ZZXI; 3100 StackID = TargetStackID::SVEVector; 3101 } 3102 break; 3103 case 48: 3104 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3105 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3106 Opc = AArch64::ST1Threev2d; 3107 Offset = false; 3108 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3109 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3110 Opc = AArch64::STR_ZZZXI; 3111 StackID = TargetStackID::SVEVector; 3112 } 3113 break; 3114 case 64: 3115 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3116 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 3117 Opc = AArch64::ST1Fourv2d; 3118 Offset = false; 3119 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3120 assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); 3121 Opc = AArch64::STR_ZZZZXI; 3122 StackID = TargetStackID::SVEVector; 3123 } 3124 break; 3125 } 3126 assert(Opc && "Unknown register class"); 3127 MFI.setStackID(FI, StackID); 3128 3129 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3130 .addReg(SrcReg, getKillRegState(isKill)) 3131 .addFrameIndex(FI); 3132 3133 if (Offset) 3134 MI.addImm(0); 3135 MI.addMemOperand(MMO); 3136 } 3137 3138 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 3139 MachineBasicBlock &MBB, 3140 MachineBasicBlock::iterator InsertBefore, 3141 const MCInstrDesc &MCID, 3142 Register DestReg, unsigned SubIdx0, 3143 unsigned SubIdx1, int FI, 3144 MachineMemOperand *MMO) { 3145 Register DestReg0 = DestReg; 3146 Register DestReg1 = DestReg; 3147 bool IsUndef = true; 3148 if (Register::isPhysicalRegister(DestReg)) { 3149 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 3150 SubIdx0 = 0; 3151 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 3152 SubIdx1 = 0; 3153 IsUndef = false; 3154 } 3155 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 3156 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 3157 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 3158 .addFrameIndex(FI) 3159 .addImm(0) 3160 .addMemOperand(MMO); 3161 } 3162 3163 void AArch64InstrInfo::loadRegFromStackSlot( 3164 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, 3165 int FI, const TargetRegisterClass *RC, 3166 const TargetRegisterInfo *TRI) const { 3167 MachineFunction &MF = *MBB.getParent(); 3168 MachineFrameInfo &MFI = MF.getFrameInfo(); 3169 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 3170 MachineMemOperand *MMO = 3171 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 3172 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 3173 3174 unsigned Opc = 0; 3175 bool Offset = true; 3176 unsigned StackID = TargetStackID::Default; 3177 switch (TRI->getSpillSize(*RC)) { 3178 case 1: 3179 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 3180 Opc = AArch64::LDRBui; 3181 break; 3182 case 2: 3183 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 3184 Opc = AArch64::LDRHui; 3185 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { 3186 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3187 Opc = AArch64::LDR_PXI; 3188 StackID = TargetStackID::SVEVector; 3189 } 3190 break; 3191 case 4: 3192 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 3193 Opc = AArch64::LDRWui; 3194 if (Register::isVirtualRegister(DestReg)) 3195 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 3196 else 3197 assert(DestReg != AArch64::WSP); 3198 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 3199 Opc = AArch64::LDRSui; 3200 break; 3201 case 8: 3202 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 3203 Opc = AArch64::LDRXui; 3204 if (Register::isVirtualRegister(DestReg)) 3205 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 3206 else 3207 assert(DestReg != AArch64::SP); 3208 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 3209 Opc = AArch64::LDRDui; 3210 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 3211 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3212 get(AArch64::LDPWi), DestReg, AArch64::sube32, 3213 AArch64::subo32, FI, MMO); 3214 return; 3215 } 3216 break; 3217 case 16: 3218 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 3219 Opc = AArch64::LDRQui; 3220 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 3221 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3222 Opc = AArch64::LD1Twov1d; 3223 Offset = false; 3224 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 3225 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 3226 get(AArch64::LDPXi), DestReg, AArch64::sube64, 3227 AArch64::subo64, FI, MMO); 3228 return; 3229 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { 3230 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3231 Opc = AArch64::LDR_ZXI; 3232 StackID = TargetStackID::SVEVector; 3233 } 3234 break; 3235 case 24: 3236 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 3237 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3238 Opc = AArch64::LD1Threev1d; 3239 Offset = false; 3240 } 3241 break; 3242 case 32: 3243 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 3244 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3245 Opc = AArch64::LD1Fourv1d; 3246 Offset = false; 3247 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 3248 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3249 Opc = AArch64::LD1Twov2d; 3250 Offset = false; 3251 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { 3252 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3253 Opc = AArch64::LDR_ZZXI; 3254 StackID = TargetStackID::SVEVector; 3255 } 3256 break; 3257 case 48: 3258 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 3259 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3260 Opc = AArch64::LD1Threev2d; 3261 Offset = false; 3262 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { 3263 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3264 Opc = AArch64::LDR_ZZZXI; 3265 StackID = TargetStackID::SVEVector; 3266 } 3267 break; 3268 case 64: 3269 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 3270 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 3271 Opc = AArch64::LD1Fourv2d; 3272 Offset = false; 3273 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { 3274 assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); 3275 Opc = AArch64::LDR_ZZZZXI; 3276 StackID = TargetStackID::SVEVector; 3277 } 3278 break; 3279 } 3280 3281 assert(Opc && "Unknown register class"); 3282 MFI.setStackID(FI, StackID); 3283 3284 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 3285 .addReg(DestReg, getDefRegState(true)) 3286 .addFrameIndex(FI); 3287 if (Offset) 3288 MI.addImm(0); 3289 MI.addMemOperand(MMO); 3290 } 3291 3292 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 3293 const MachineInstr &UseMI, 3294 const TargetRegisterInfo *TRI) { 3295 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()), 3296 UseMI.getIterator()), 3297 [TRI](const MachineInstr &I) { 3298 return I.modifiesRegister(AArch64::NZCV, TRI) || 3299 I.readsRegister(AArch64::NZCV, TRI); 3300 }); 3301 } 3302 3303 // Helper function to emit a frame offset adjustment from a given 3304 // pointer (SrcReg), stored into DestReg. This function is explicit 3305 // in that it requires the opcode. 3306 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 3307 MachineBasicBlock::iterator MBBI, 3308 const DebugLoc &DL, unsigned DestReg, 3309 unsigned SrcReg, int64_t Offset, unsigned Opc, 3310 const TargetInstrInfo *TII, 3311 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 3312 bool *HasWinCFI) { 3313 int Sign = 1; 3314 unsigned MaxEncoding, ShiftSize; 3315 switch (Opc) { 3316 case AArch64::ADDXri: 3317 case AArch64::ADDSXri: 3318 case AArch64::SUBXri: 3319 case AArch64::SUBSXri: 3320 MaxEncoding = 0xfff; 3321 ShiftSize = 12; 3322 break; 3323 case AArch64::ADDVL_XXI: 3324 case AArch64::ADDPL_XXI: 3325 MaxEncoding = 31; 3326 ShiftSize = 0; 3327 if (Offset < 0) { 3328 MaxEncoding = 32; 3329 Sign = -1; 3330 Offset = -Offset; 3331 } 3332 break; 3333 default: 3334 llvm_unreachable("Unsupported opcode"); 3335 } 3336 3337 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 3338 // scratch register. If DestReg is a virtual register, use it as the 3339 // scratch register; otherwise, create a new virtual register (to be 3340 // replaced by the scavenger at the end of PEI). That case can be optimized 3341 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 3342 // register can be loaded with offset%8 and the add/sub can use an extending 3343 // instruction with LSL#3. 3344 // Currently the function handles any offsets but generates a poor sequence 3345 // of code. 3346 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 3347 3348 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 3349 Register TmpReg = DestReg; 3350 if (TmpReg == AArch64::XZR) 3351 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister( 3352 &AArch64::GPR64RegClass); 3353 do { 3354 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue); 3355 unsigned LocalShiftSize = 0; 3356 if (ThisVal > MaxEncoding) { 3357 ThisVal = ThisVal >> ShiftSize; 3358 LocalShiftSize = ShiftSize; 3359 } 3360 assert((ThisVal >> ShiftSize) <= MaxEncoding && 3361 "Encoding cannot handle value that big"); 3362 3363 Offset -= ThisVal << LocalShiftSize; 3364 if (Offset == 0) 3365 TmpReg = DestReg; 3366 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg) 3367 .addReg(SrcReg) 3368 .addImm(Sign * (int)ThisVal); 3369 if (ShiftSize) 3370 MBI = MBI.addImm( 3371 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 3372 MBI = MBI.setMIFlag(Flag); 3373 3374 if (NeedsWinCFI) { 3375 assert(Sign == 1 && "SEH directives should always have a positive sign"); 3376 int Imm = (int)(ThisVal << LocalShiftSize); 3377 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 3378 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 3379 if (HasWinCFI) 3380 *HasWinCFI = true; 3381 if (Imm == 0) 3382 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 3383 else 3384 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 3385 .addImm(Imm) 3386 .setMIFlag(Flag); 3387 assert(Offset == 0 && "Expected remaining offset to be zero to " 3388 "emit a single SEH directive"); 3389 } else if (DestReg == AArch64::SP) { 3390 if (HasWinCFI) 3391 *HasWinCFI = true; 3392 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 3393 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 3394 .addImm(Imm) 3395 .setMIFlag(Flag); 3396 } 3397 if (HasWinCFI) 3398 *HasWinCFI = true; 3399 } 3400 3401 SrcReg = TmpReg; 3402 } while (Offset); 3403 } 3404 3405 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 3406 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 3407 unsigned DestReg, unsigned SrcReg, 3408 StackOffset Offset, const TargetInstrInfo *TII, 3409 MachineInstr::MIFlag Flag, bool SetNZCV, 3410 bool NeedsWinCFI, bool *HasWinCFI) { 3411 int64_t Bytes, NumPredicateVectors, NumDataVectors; 3412 Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); 3413 3414 // First emit non-scalable frame offsets, or a simple 'mov'. 3415 if (Bytes || (!Offset && SrcReg != DestReg)) { 3416 assert((DestReg != AArch64::SP || Bytes % 16 == 0) && 3417 "SP increment/decrement not 16-byte aligned"); 3418 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 3419 if (Bytes < 0) { 3420 Bytes = -Bytes; 3421 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 3422 } 3423 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 3424 NeedsWinCFI, HasWinCFI); 3425 SrcReg = DestReg; 3426 } 3427 3428 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && 3429 "SetNZCV not supported with SVE vectors"); 3430 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && 3431 "WinCFI not supported with SVE vectors"); 3432 3433 if (NumDataVectors) { 3434 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, 3435 AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); 3436 SrcReg = DestReg; 3437 } 3438 3439 if (NumPredicateVectors) { 3440 assert(DestReg != AArch64::SP && "Unaligned access to SP"); 3441 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, 3442 AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); 3443 } 3444 } 3445 3446 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 3447 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 3448 MachineBasicBlock::iterator InsertPt, int FrameIndex, 3449 LiveIntervals *LIS, VirtRegMap *VRM) const { 3450 // This is a bit of a hack. Consider this instruction: 3451 // 3452 // %0 = COPY %sp; GPR64all:%0 3453 // 3454 // We explicitly chose GPR64all for the virtual register so such a copy might 3455 // be eliminated by RegisterCoalescer. However, that may not be possible, and 3456 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 3457 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 3458 // 3459 // To prevent that, we are going to constrain the %0 register class here. 3460 // 3461 // <rdar://problem/11522048> 3462 // 3463 if (MI.isFullCopy()) { 3464 Register DstReg = MI.getOperand(0).getReg(); 3465 Register SrcReg = MI.getOperand(1).getReg(); 3466 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { 3467 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 3468 return nullptr; 3469 } 3470 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { 3471 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3472 return nullptr; 3473 } 3474 } 3475 3476 // Handle the case where a copy is being spilled or filled but the source 3477 // and destination register class don't match. For example: 3478 // 3479 // %0 = COPY %xzr; GPR64common:%0 3480 // 3481 // In this case we can still safely fold away the COPY and generate the 3482 // following spill code: 3483 // 3484 // STRXui %xzr, %stack.0 3485 // 3486 // This also eliminates spilled cross register class COPYs (e.g. between x and 3487 // d regs) of the same size. For example: 3488 // 3489 // %0 = COPY %1; GPR64:%0, FPR64:%1 3490 // 3491 // will be filled as 3492 // 3493 // LDRDui %0, fi<#0> 3494 // 3495 // instead of 3496 // 3497 // LDRXui %Temp, fi<#0> 3498 // %0 = FMOV %Temp 3499 // 3500 if (MI.isCopy() && Ops.size() == 1 && 3501 // Make sure we're only folding the explicit COPY defs/uses. 3502 (Ops[0] == 0 || Ops[0] == 1)) { 3503 bool IsSpill = Ops[0] == 0; 3504 bool IsFill = !IsSpill; 3505 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3506 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3507 MachineBasicBlock &MBB = *MI.getParent(); 3508 const MachineOperand &DstMO = MI.getOperand(0); 3509 const MachineOperand &SrcMO = MI.getOperand(1); 3510 Register DstReg = DstMO.getReg(); 3511 Register SrcReg = SrcMO.getReg(); 3512 // This is slightly expensive to compute for physical regs since 3513 // getMinimalPhysRegClass is slow. 3514 auto getRegClass = [&](unsigned Reg) { 3515 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 3516 : TRI.getMinimalPhysRegClass(Reg); 3517 }; 3518 3519 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 3520 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 3521 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 3522 "Mismatched register size in non subreg COPY"); 3523 if (IsSpill) 3524 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 3525 getRegClass(SrcReg), &TRI); 3526 else 3527 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 3528 getRegClass(DstReg), &TRI); 3529 return &*--InsertPt; 3530 } 3531 3532 // Handle cases like spilling def of: 3533 // 3534 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 3535 // 3536 // where the physical register source can be widened and stored to the full 3537 // virtual reg destination stack slot, in this case producing: 3538 // 3539 // STRXui %xzr, %stack.0 3540 // 3541 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { 3542 assert(SrcMO.getSubReg() == 0 && 3543 "Unexpected subreg on physical register"); 3544 const TargetRegisterClass *SpillRC; 3545 unsigned SpillSubreg; 3546 switch (DstMO.getSubReg()) { 3547 default: 3548 SpillRC = nullptr; 3549 break; 3550 case AArch64::sub_32: 3551 case AArch64::ssub: 3552 if (AArch64::GPR32RegClass.contains(SrcReg)) { 3553 SpillRC = &AArch64::GPR64RegClass; 3554 SpillSubreg = AArch64::sub_32; 3555 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 3556 SpillRC = &AArch64::FPR64RegClass; 3557 SpillSubreg = AArch64::ssub; 3558 } else 3559 SpillRC = nullptr; 3560 break; 3561 case AArch64::dsub: 3562 if (AArch64::FPR64RegClass.contains(SrcReg)) { 3563 SpillRC = &AArch64::FPR128RegClass; 3564 SpillSubreg = AArch64::dsub; 3565 } else 3566 SpillRC = nullptr; 3567 break; 3568 } 3569 3570 if (SpillRC) 3571 if (unsigned WidenedSrcReg = 3572 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 3573 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 3574 FrameIndex, SpillRC, &TRI); 3575 return &*--InsertPt; 3576 } 3577 } 3578 3579 // Handle cases like filling use of: 3580 // 3581 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 3582 // 3583 // where we can load the full virtual reg source stack slot, into the subreg 3584 // destination, in this case producing: 3585 // 3586 // LDRWui %0:sub_32<def,read-undef>, %stack.0 3587 // 3588 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 3589 const TargetRegisterClass *FillRC; 3590 switch (DstMO.getSubReg()) { 3591 default: 3592 FillRC = nullptr; 3593 break; 3594 case AArch64::sub_32: 3595 FillRC = &AArch64::GPR32RegClass; 3596 break; 3597 case AArch64::ssub: 3598 FillRC = &AArch64::FPR32RegClass; 3599 break; 3600 case AArch64::dsub: 3601 FillRC = &AArch64::FPR64RegClass; 3602 break; 3603 } 3604 3605 if (FillRC) { 3606 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 3607 TRI.getRegSizeInBits(*FillRC) && 3608 "Mismatched regclass size on folded subreg COPY"); 3609 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 3610 MachineInstr &LoadMI = *--InsertPt; 3611 MachineOperand &LoadDst = LoadMI.getOperand(0); 3612 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 3613 LoadDst.setSubReg(DstMO.getSubReg()); 3614 LoadDst.setIsUndef(); 3615 return &LoadMI; 3616 } 3617 } 3618 } 3619 3620 // Cannot fold. 3621 return nullptr; 3622 } 3623 3624 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 3625 StackOffset &SOffset, 3626 bool *OutUseUnscaledOp, 3627 unsigned *OutUnscaledOp, 3628 int64_t *EmittableOffset) { 3629 // Set output values in case of early exit. 3630 if (EmittableOffset) 3631 *EmittableOffset = 0; 3632 if (OutUseUnscaledOp) 3633 *OutUseUnscaledOp = false; 3634 if (OutUnscaledOp) 3635 *OutUnscaledOp = 0; 3636 3637 // Exit early for structured vector spills/fills as they can't take an 3638 // immediate offset. 3639 switch (MI.getOpcode()) { 3640 default: 3641 break; 3642 case AArch64::LD1Twov2d: 3643 case AArch64::LD1Threev2d: 3644 case AArch64::LD1Fourv2d: 3645 case AArch64::LD1Twov1d: 3646 case AArch64::LD1Threev1d: 3647 case AArch64::LD1Fourv1d: 3648 case AArch64::ST1Twov2d: 3649 case AArch64::ST1Threev2d: 3650 case AArch64::ST1Fourv2d: 3651 case AArch64::ST1Twov1d: 3652 case AArch64::ST1Threev1d: 3653 case AArch64::ST1Fourv1d: 3654 case AArch64::IRG: 3655 case AArch64::IRGstack: 3656 case AArch64::STGloop: 3657 case AArch64::STZGloop: 3658 return AArch64FrameOffsetCannotUpdate; 3659 } 3660 3661 // Get the min/max offset and the scale. 3662 TypeSize ScaleValue(0U, false); 3663 unsigned Width; 3664 int64_t MinOff, MaxOff; 3665 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff, 3666 MaxOff)) 3667 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3668 3669 // Construct the complete offset. 3670 bool IsMulVL = ScaleValue.isScalable(); 3671 unsigned Scale = ScaleValue.getKnownMinSize(); 3672 int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes(); 3673 3674 const MachineOperand &ImmOpnd = 3675 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 3676 Offset += ImmOpnd.getImm() * Scale; 3677 3678 // If the offset doesn't match the scale, we rewrite the instruction to 3679 // use the unscaled instruction instead. Likewise, if we have a negative 3680 // offset and there is an unscaled op to use. 3681 Optional<unsigned> UnscaledOp = 3682 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 3683 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 3684 if (useUnscaledOp && 3685 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff, 3686 MaxOff)) 3687 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3688 3689 Scale = ScaleValue.getKnownMinSize(); 3690 assert(IsMulVL == ScaleValue.isScalable() && 3691 "Unscaled opcode has different value for scalable"); 3692 3693 int64_t Remainder = Offset % Scale; 3694 assert(!(Remainder && useUnscaledOp) && 3695 "Cannot have remainder when using unscaled op"); 3696 3697 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 3698 int64_t NewOffset = Offset / Scale; 3699 if (MinOff <= NewOffset && NewOffset <= MaxOff) 3700 Offset = Remainder; 3701 else { 3702 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 3703 Offset = Offset - NewOffset * Scale + Remainder; 3704 } 3705 3706 if (EmittableOffset) 3707 *EmittableOffset = NewOffset; 3708 if (OutUseUnscaledOp) 3709 *OutUseUnscaledOp = useUnscaledOp; 3710 if (OutUnscaledOp && UnscaledOp) 3711 *OutUnscaledOp = *UnscaledOp; 3712 3713 if (IsMulVL) 3714 SOffset = StackOffset(Offset, MVT::nxv1i8) + 3715 StackOffset(SOffset.getBytes(), MVT::i8); 3716 else 3717 SOffset = StackOffset(Offset, MVT::i8) + 3718 StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); 3719 return AArch64FrameOffsetCanUpdate | 3720 (SOffset ? 0 : AArch64FrameOffsetIsLegal); 3721 } 3722 3723 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3724 unsigned FrameReg, StackOffset &Offset, 3725 const AArch64InstrInfo *TII) { 3726 unsigned Opcode = MI.getOpcode(); 3727 unsigned ImmIdx = FrameRegIdx + 1; 3728 3729 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3730 Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); 3731 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3732 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3733 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3734 MI.eraseFromParent(); 3735 Offset = StackOffset(); 3736 return true; 3737 } 3738 3739 int64_t NewOffset; 3740 unsigned UnscaledOp; 3741 bool UseUnscaledOp; 3742 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3743 &UnscaledOp, &NewOffset); 3744 if (Status & AArch64FrameOffsetCanUpdate) { 3745 if (Status & AArch64FrameOffsetIsLegal) 3746 // Replace the FrameIndex with FrameReg. 3747 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3748 if (UseUnscaledOp) 3749 MI.setDesc(TII->get(UnscaledOp)); 3750 3751 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3752 return !Offset; 3753 } 3754 3755 return false; 3756 } 3757 3758 void AArch64InstrInfo::getNoop(MCInst &NopInst) const { 3759 NopInst.setOpcode(AArch64::HINT); 3760 NopInst.addOperand(MCOperand::createImm(0)); 3761 } 3762 3763 // AArch64 supports MachineCombiner. 3764 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 3765 3766 // True when Opc sets flag 3767 static bool isCombineInstrSettingFlag(unsigned Opc) { 3768 switch (Opc) { 3769 case AArch64::ADDSWrr: 3770 case AArch64::ADDSWri: 3771 case AArch64::ADDSXrr: 3772 case AArch64::ADDSXri: 3773 case AArch64::SUBSWrr: 3774 case AArch64::SUBSXrr: 3775 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3776 case AArch64::SUBSWri: 3777 case AArch64::SUBSXri: 3778 return true; 3779 default: 3780 break; 3781 } 3782 return false; 3783 } 3784 3785 // 32b Opcodes that can be combined with a MUL 3786 static bool isCombineInstrCandidate32(unsigned Opc) { 3787 switch (Opc) { 3788 case AArch64::ADDWrr: 3789 case AArch64::ADDWri: 3790 case AArch64::SUBWrr: 3791 case AArch64::ADDSWrr: 3792 case AArch64::ADDSWri: 3793 case AArch64::SUBSWrr: 3794 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3795 case AArch64::SUBWri: 3796 case AArch64::SUBSWri: 3797 return true; 3798 default: 3799 break; 3800 } 3801 return false; 3802 } 3803 3804 // 64b Opcodes that can be combined with a MUL 3805 static bool isCombineInstrCandidate64(unsigned Opc) { 3806 switch (Opc) { 3807 case AArch64::ADDXrr: 3808 case AArch64::ADDXri: 3809 case AArch64::SUBXrr: 3810 case AArch64::ADDSXrr: 3811 case AArch64::ADDSXri: 3812 case AArch64::SUBSXrr: 3813 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3814 case AArch64::SUBXri: 3815 case AArch64::SUBSXri: 3816 case AArch64::ADDv8i8: 3817 case AArch64::ADDv16i8: 3818 case AArch64::ADDv4i16: 3819 case AArch64::ADDv8i16: 3820 case AArch64::ADDv2i32: 3821 case AArch64::ADDv4i32: 3822 case AArch64::SUBv8i8: 3823 case AArch64::SUBv16i8: 3824 case AArch64::SUBv4i16: 3825 case AArch64::SUBv8i16: 3826 case AArch64::SUBv2i32: 3827 case AArch64::SUBv4i32: 3828 return true; 3829 default: 3830 break; 3831 } 3832 return false; 3833 } 3834 3835 // FP Opcodes that can be combined with a FMUL 3836 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3837 switch (Inst.getOpcode()) { 3838 default: 3839 break; 3840 case AArch64::FADDHrr: 3841 case AArch64::FADDSrr: 3842 case AArch64::FADDDrr: 3843 case AArch64::FADDv4f16: 3844 case AArch64::FADDv8f16: 3845 case AArch64::FADDv2f32: 3846 case AArch64::FADDv2f64: 3847 case AArch64::FADDv4f32: 3848 case AArch64::FSUBHrr: 3849 case AArch64::FSUBSrr: 3850 case AArch64::FSUBDrr: 3851 case AArch64::FSUBv4f16: 3852 case AArch64::FSUBv8f16: 3853 case AArch64::FSUBv2f32: 3854 case AArch64::FSUBv2f64: 3855 case AArch64::FSUBv4f32: 3856 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3857 return (Options.UnsafeFPMath || 3858 Options.AllowFPOpFusion == FPOpFusion::Fast); 3859 } 3860 return false; 3861 } 3862 3863 // Opcodes that can be combined with a MUL 3864 static bool isCombineInstrCandidate(unsigned Opc) { 3865 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3866 } 3867 3868 // 3869 // Utility routine that checks if \param MO is defined by an 3870 // \param CombineOpc instruction in the basic block \param MBB 3871 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3872 unsigned CombineOpc, unsigned ZeroReg = 0, 3873 bool CheckZeroReg = false) { 3874 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3875 MachineInstr *MI = nullptr; 3876 3877 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 3878 MI = MRI.getUniqueVRegDef(MO.getReg()); 3879 // And it needs to be in the trace (otherwise, it won't have a depth). 3880 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3881 return false; 3882 // Must only used by the user we combine with. 3883 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3884 return false; 3885 3886 if (CheckZeroReg) { 3887 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3888 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3889 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3890 // The third input reg must be zero. 3891 if (MI->getOperand(3).getReg() != ZeroReg) 3892 return false; 3893 } 3894 3895 return true; 3896 } 3897 3898 // 3899 // Is \param MO defined by an integer multiply and can be combined? 3900 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3901 unsigned MulOpc, unsigned ZeroReg) { 3902 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3903 } 3904 3905 // 3906 // Is \param MO defined by a floating-point multiply and can be combined? 3907 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3908 unsigned MulOpc) { 3909 return canCombine(MBB, MO, MulOpc); 3910 } 3911 3912 // TODO: There are many more machine instruction opcodes to match: 3913 // 1. Other data types (integer, vectors) 3914 // 2. Other math / logic operations (xor, or) 3915 // 3. Other forms of the same operation (intrinsics and other variants) 3916 bool AArch64InstrInfo::isAssociativeAndCommutative( 3917 const MachineInstr &Inst) const { 3918 switch (Inst.getOpcode()) { 3919 case AArch64::FADDDrr: 3920 case AArch64::FADDSrr: 3921 case AArch64::FADDv2f32: 3922 case AArch64::FADDv2f64: 3923 case AArch64::FADDv4f32: 3924 case AArch64::FMULDrr: 3925 case AArch64::FMULSrr: 3926 case AArch64::FMULX32: 3927 case AArch64::FMULX64: 3928 case AArch64::FMULXv2f32: 3929 case AArch64::FMULXv2f64: 3930 case AArch64::FMULXv4f32: 3931 case AArch64::FMULv2f32: 3932 case AArch64::FMULv2f64: 3933 case AArch64::FMULv4f32: 3934 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3935 default: 3936 return false; 3937 } 3938 } 3939 3940 /// Find instructions that can be turned into madd. 3941 static bool getMaddPatterns(MachineInstr &Root, 3942 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3943 unsigned Opc = Root.getOpcode(); 3944 MachineBasicBlock &MBB = *Root.getParent(); 3945 bool Found = false; 3946 3947 if (!isCombineInstrCandidate(Opc)) 3948 return false; 3949 if (isCombineInstrSettingFlag(Opc)) { 3950 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3951 // When NZCV is live bail out. 3952 if (Cmp_NZCV == -1) 3953 return false; 3954 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 3955 // When opcode can't change bail out. 3956 // CHECKME: do we miss any cases for opcode conversion? 3957 if (NewOpc == Opc) 3958 return false; 3959 Opc = NewOpc; 3960 } 3961 3962 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, 3963 MachineCombinerPattern Pattern) { 3964 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { 3965 Patterns.push_back(Pattern); 3966 Found = true; 3967 } 3968 }; 3969 3970 auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) { 3971 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) { 3972 Patterns.push_back(Pattern); 3973 Found = true; 3974 } 3975 }; 3976 3977 typedef MachineCombinerPattern MCP; 3978 3979 switch (Opc) { 3980 default: 3981 break; 3982 case AArch64::ADDWrr: 3983 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3984 "ADDWrr does not have register operands"); 3985 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); 3986 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); 3987 break; 3988 case AArch64::ADDXrr: 3989 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); 3990 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); 3991 break; 3992 case AArch64::SUBWrr: 3993 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); 3994 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); 3995 break; 3996 case AArch64::SUBXrr: 3997 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); 3998 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); 3999 break; 4000 case AArch64::ADDWri: 4001 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); 4002 break; 4003 case AArch64::ADDXri: 4004 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); 4005 break; 4006 case AArch64::SUBWri: 4007 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); 4008 break; 4009 case AArch64::SUBXri: 4010 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); 4011 break; 4012 case AArch64::ADDv8i8: 4013 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1); 4014 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2); 4015 break; 4016 case AArch64::ADDv16i8: 4017 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1); 4018 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2); 4019 break; 4020 case AArch64::ADDv4i16: 4021 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1); 4022 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2); 4023 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1); 4024 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2); 4025 break; 4026 case AArch64::ADDv8i16: 4027 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1); 4028 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2); 4029 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1); 4030 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2); 4031 break; 4032 case AArch64::ADDv2i32: 4033 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1); 4034 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2); 4035 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1); 4036 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2); 4037 break; 4038 case AArch64::ADDv4i32: 4039 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1); 4040 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2); 4041 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1); 4042 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2); 4043 break; 4044 case AArch64::SUBv8i8: 4045 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1); 4046 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2); 4047 break; 4048 case AArch64::SUBv16i8: 4049 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1); 4050 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2); 4051 break; 4052 case AArch64::SUBv4i16: 4053 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1); 4054 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2); 4055 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1); 4056 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2); 4057 break; 4058 case AArch64::SUBv8i16: 4059 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1); 4060 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2); 4061 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1); 4062 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2); 4063 break; 4064 case AArch64::SUBv2i32: 4065 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1); 4066 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2); 4067 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1); 4068 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2); 4069 break; 4070 case AArch64::SUBv4i32: 4071 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1); 4072 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2); 4073 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1); 4074 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2); 4075 break; 4076 } 4077 return Found; 4078 } 4079 /// Floating-Point Support 4080 4081 /// Find instructions that can be turned into madd. 4082 static bool getFMAPatterns(MachineInstr &Root, 4083 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 4084 4085 if (!isCombineInstrCandidateFP(Root)) 4086 return false; 4087 4088 MachineBasicBlock &MBB = *Root.getParent(); 4089 bool Found = false; 4090 4091 auto Match = [&](int Opcode, int Operand, 4092 MachineCombinerPattern Pattern) -> bool { 4093 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { 4094 Patterns.push_back(Pattern); 4095 return true; 4096 } 4097 return false; 4098 }; 4099 4100 typedef MachineCombinerPattern MCP; 4101 4102 switch (Root.getOpcode()) { 4103 default: 4104 assert(false && "Unsupported FP instruction in combiner\n"); 4105 break; 4106 case AArch64::FADDHrr: 4107 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4108 "FADDHrr does not have register operands"); 4109 4110 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); 4111 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); 4112 break; 4113 case AArch64::FADDSrr: 4114 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 4115 "FADDSrr does not have register operands"); 4116 4117 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || 4118 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); 4119 4120 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || 4121 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); 4122 break; 4123 case AArch64::FADDDrr: 4124 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || 4125 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); 4126 4127 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || 4128 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); 4129 break; 4130 case AArch64::FADDv4f16: 4131 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || 4132 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); 4133 4134 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || 4135 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); 4136 break; 4137 case AArch64::FADDv8f16: 4138 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || 4139 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); 4140 4141 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || 4142 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); 4143 break; 4144 case AArch64::FADDv2f32: 4145 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || 4146 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); 4147 4148 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || 4149 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); 4150 break; 4151 case AArch64::FADDv2f64: 4152 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || 4153 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); 4154 4155 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || 4156 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); 4157 break; 4158 case AArch64::FADDv4f32: 4159 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || 4160 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); 4161 4162 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || 4163 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); 4164 break; 4165 case AArch64::FSUBHrr: 4166 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); 4167 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); 4168 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); 4169 break; 4170 case AArch64::FSUBSrr: 4171 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); 4172 4173 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || 4174 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); 4175 4176 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); 4177 break; 4178 case AArch64::FSUBDrr: 4179 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); 4180 4181 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || 4182 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); 4183 4184 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); 4185 break; 4186 case AArch64::FSUBv4f16: 4187 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || 4188 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); 4189 4190 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || 4191 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); 4192 break; 4193 case AArch64::FSUBv8f16: 4194 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || 4195 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); 4196 4197 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || 4198 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); 4199 break; 4200 case AArch64::FSUBv2f32: 4201 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || 4202 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); 4203 4204 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || 4205 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); 4206 break; 4207 case AArch64::FSUBv2f64: 4208 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || 4209 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); 4210 4211 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || 4212 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); 4213 break; 4214 case AArch64::FSUBv4f32: 4215 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || 4216 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); 4217 4218 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || 4219 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); 4220 break; 4221 } 4222 return Found; 4223 } 4224 4225 /// Return true when a code sequence can improve throughput. It 4226 /// should be called only for instructions in loops. 4227 /// \param Pattern - combiner pattern 4228 bool AArch64InstrInfo::isThroughputPattern( 4229 MachineCombinerPattern Pattern) const { 4230 switch (Pattern) { 4231 default: 4232 break; 4233 case MachineCombinerPattern::FMULADDH_OP1: 4234 case MachineCombinerPattern::FMULADDH_OP2: 4235 case MachineCombinerPattern::FMULSUBH_OP1: 4236 case MachineCombinerPattern::FMULSUBH_OP2: 4237 case MachineCombinerPattern::FMULADDS_OP1: 4238 case MachineCombinerPattern::FMULADDS_OP2: 4239 case MachineCombinerPattern::FMULSUBS_OP1: 4240 case MachineCombinerPattern::FMULSUBS_OP2: 4241 case MachineCombinerPattern::FMULADDD_OP1: 4242 case MachineCombinerPattern::FMULADDD_OP2: 4243 case MachineCombinerPattern::FMULSUBD_OP1: 4244 case MachineCombinerPattern::FMULSUBD_OP2: 4245 case MachineCombinerPattern::FNMULSUBH_OP1: 4246 case MachineCombinerPattern::FNMULSUBS_OP1: 4247 case MachineCombinerPattern::FNMULSUBD_OP1: 4248 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 4249 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 4250 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 4251 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 4252 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4253 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4254 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4255 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 4256 case MachineCombinerPattern::FMLAv4f16_OP2: 4257 case MachineCombinerPattern::FMLAv4f16_OP1: 4258 case MachineCombinerPattern::FMLAv8f16_OP1: 4259 case MachineCombinerPattern::FMLAv8f16_OP2: 4260 case MachineCombinerPattern::FMLAv2f32_OP2: 4261 case MachineCombinerPattern::FMLAv2f32_OP1: 4262 case MachineCombinerPattern::FMLAv2f64_OP1: 4263 case MachineCombinerPattern::FMLAv2f64_OP2: 4264 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 4265 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 4266 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 4267 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 4268 case MachineCombinerPattern::FMLAv4f32_OP1: 4269 case MachineCombinerPattern::FMLAv4f32_OP2: 4270 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 4271 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 4272 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: 4273 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 4274 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: 4275 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 4276 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4277 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4278 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4279 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4280 case MachineCombinerPattern::FMLSv4f16_OP1: 4281 case MachineCombinerPattern::FMLSv4f16_OP2: 4282 case MachineCombinerPattern::FMLSv8f16_OP1: 4283 case MachineCombinerPattern::FMLSv8f16_OP2: 4284 case MachineCombinerPattern::FMLSv2f32_OP2: 4285 case MachineCombinerPattern::FMLSv2f64_OP2: 4286 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4287 case MachineCombinerPattern::FMLSv4f32_OP2: 4288 case MachineCombinerPattern::MULADDv8i8_OP1: 4289 case MachineCombinerPattern::MULADDv8i8_OP2: 4290 case MachineCombinerPattern::MULADDv16i8_OP1: 4291 case MachineCombinerPattern::MULADDv16i8_OP2: 4292 case MachineCombinerPattern::MULADDv4i16_OP1: 4293 case MachineCombinerPattern::MULADDv4i16_OP2: 4294 case MachineCombinerPattern::MULADDv8i16_OP1: 4295 case MachineCombinerPattern::MULADDv8i16_OP2: 4296 case MachineCombinerPattern::MULADDv2i32_OP1: 4297 case MachineCombinerPattern::MULADDv2i32_OP2: 4298 case MachineCombinerPattern::MULADDv4i32_OP1: 4299 case MachineCombinerPattern::MULADDv4i32_OP2: 4300 case MachineCombinerPattern::MULSUBv8i8_OP1: 4301 case MachineCombinerPattern::MULSUBv8i8_OP2: 4302 case MachineCombinerPattern::MULSUBv16i8_OP1: 4303 case MachineCombinerPattern::MULSUBv16i8_OP2: 4304 case MachineCombinerPattern::MULSUBv4i16_OP1: 4305 case MachineCombinerPattern::MULSUBv4i16_OP2: 4306 case MachineCombinerPattern::MULSUBv8i16_OP1: 4307 case MachineCombinerPattern::MULSUBv8i16_OP2: 4308 case MachineCombinerPattern::MULSUBv2i32_OP1: 4309 case MachineCombinerPattern::MULSUBv2i32_OP2: 4310 case MachineCombinerPattern::MULSUBv4i32_OP1: 4311 case MachineCombinerPattern::MULSUBv4i32_OP2: 4312 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 4313 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 4314 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 4315 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 4316 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 4317 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 4318 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 4319 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 4320 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 4321 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 4322 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 4323 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 4324 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 4325 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 4326 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 4327 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 4328 return true; 4329 } // end switch (Pattern) 4330 return false; 4331 } 4332 /// Return true when there is potentially a faster code sequence for an 4333 /// instruction chain ending in \p Root. All potential patterns are listed in 4334 /// the \p Pattern vector. Pattern should be sorted in priority order since the 4335 /// pattern evaluator stops checking as soon as it finds a faster sequence. 4336 4337 bool AArch64InstrInfo::getMachineCombinerPatterns( 4338 MachineInstr &Root, 4339 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 4340 // Integer patterns 4341 if (getMaddPatterns(Root, Patterns)) 4342 return true; 4343 // Floating point patterns 4344 if (getFMAPatterns(Root, Patterns)) 4345 return true; 4346 4347 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 4348 } 4349 4350 enum class FMAInstKind { Default, Indexed, Accumulator }; 4351 /// genFusedMultiply - Generate fused multiply instructions. 4352 /// This function supports both integer and floating point instructions. 4353 /// A typical example: 4354 /// F|MUL I=A,B,0 4355 /// F|ADD R,I,C 4356 /// ==> F|MADD R,A,B,C 4357 /// \param MF Containing MachineFunction 4358 /// \param MRI Register information 4359 /// \param TII Target information 4360 /// \param Root is the F|ADD instruction 4361 /// \param [out] InsInstrs is a vector of machine instructions and will 4362 /// contain the generated madd instruction 4363 /// \param IdxMulOpd is index of operand in Root that is the result of 4364 /// the F|MUL. In the example above IdxMulOpd is 1. 4365 /// \param MaddOpc the opcode fo the f|madd instruction 4366 /// \param RC Register class of operands 4367 /// \param kind of fma instruction (addressing mode) to be generated 4368 /// \param ReplacedAddend is the result register from the instruction 4369 /// replacing the non-combined operand, if any. 4370 static MachineInstr * 4371 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 4372 const TargetInstrInfo *TII, MachineInstr &Root, 4373 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 4374 unsigned MaddOpc, const TargetRegisterClass *RC, 4375 FMAInstKind kind = FMAInstKind::Default, 4376 const Register *ReplacedAddend = nullptr) { 4377 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4378 4379 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 4380 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4381 Register ResultReg = Root.getOperand(0).getReg(); 4382 Register SrcReg0 = MUL->getOperand(1).getReg(); 4383 bool Src0IsKill = MUL->getOperand(1).isKill(); 4384 Register SrcReg1 = MUL->getOperand(2).getReg(); 4385 bool Src1IsKill = MUL->getOperand(2).isKill(); 4386 4387 unsigned SrcReg2; 4388 bool Src2IsKill; 4389 if (ReplacedAddend) { 4390 // If we just generated a new addend, we must be it's only use. 4391 SrcReg2 = *ReplacedAddend; 4392 Src2IsKill = true; 4393 } else { 4394 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 4395 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 4396 } 4397 4398 if (Register::isVirtualRegister(ResultReg)) 4399 MRI.constrainRegClass(ResultReg, RC); 4400 if (Register::isVirtualRegister(SrcReg0)) 4401 MRI.constrainRegClass(SrcReg0, RC); 4402 if (Register::isVirtualRegister(SrcReg1)) 4403 MRI.constrainRegClass(SrcReg1, RC); 4404 if (Register::isVirtualRegister(SrcReg2)) 4405 MRI.constrainRegClass(SrcReg2, RC); 4406 4407 MachineInstrBuilder MIB; 4408 if (kind == FMAInstKind::Default) 4409 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4410 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4411 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4412 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 4413 else if (kind == FMAInstKind::Indexed) 4414 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4415 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4416 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4417 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4418 .addImm(MUL->getOperand(3).getImm()); 4419 else if (kind == FMAInstKind::Accumulator) 4420 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4421 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4422 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4423 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 4424 else 4425 assert(false && "Invalid FMA instruction kind \n"); 4426 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 4427 InsInstrs.push_back(MIB); 4428 return MUL; 4429 } 4430 4431 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate 4432 /// instructions. 4433 /// 4434 /// \see genFusedMultiply 4435 static MachineInstr *genFusedMultiplyAcc( 4436 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4437 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4438 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 4439 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4440 FMAInstKind::Accumulator); 4441 } 4442 4443 /// genNeg - Helper to generate an intermediate negation of the second operand 4444 /// of Root 4445 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, 4446 const TargetInstrInfo *TII, MachineInstr &Root, 4447 SmallVectorImpl<MachineInstr *> &InsInstrs, 4448 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 4449 unsigned MnegOpc, const TargetRegisterClass *RC) { 4450 Register NewVR = MRI.createVirtualRegister(RC); 4451 MachineInstrBuilder MIB = 4452 BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR) 4453 .add(Root.getOperand(2)); 4454 InsInstrs.push_back(MIB); 4455 4456 assert(InstrIdxForVirtReg.empty()); 4457 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4458 4459 return NewVR; 4460 } 4461 4462 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 4463 /// instructions with an additional negation of the accumulator 4464 static MachineInstr *genFusedMultiplyAccNeg( 4465 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4466 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4467 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 4468 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 4469 assert(IdxMulOpd == 1); 4470 4471 Register NewVR = 4472 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 4473 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4474 FMAInstKind::Accumulator, &NewVR); 4475 } 4476 4477 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate 4478 /// instructions. 4479 /// 4480 /// \see genFusedMultiply 4481 static MachineInstr *genFusedMultiplyIdx( 4482 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4483 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4484 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) { 4485 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4486 FMAInstKind::Indexed); 4487 } 4488 4489 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate 4490 /// instructions with an additional negation of the accumulator 4491 static MachineInstr *genFusedMultiplyIdxNeg( 4492 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, 4493 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, 4494 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd, 4495 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) { 4496 assert(IdxMulOpd == 1); 4497 4498 Register NewVR = 4499 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC); 4500 4501 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC, 4502 FMAInstKind::Indexed, &NewVR); 4503 } 4504 4505 /// genMaddR - Generate madd instruction and combine mul and add using 4506 /// an extra virtual register 4507 /// Example - an ADD intermediate needs to be stored in a register: 4508 /// MUL I=A,B,0 4509 /// ADD R,I,Imm 4510 /// ==> ORR V, ZR, Imm 4511 /// ==> MADD R,A,B,V 4512 /// \param MF Containing MachineFunction 4513 /// \param MRI Register information 4514 /// \param TII Target information 4515 /// \param Root is the ADD instruction 4516 /// \param [out] InsInstrs is a vector of machine instructions and will 4517 /// contain the generated madd instruction 4518 /// \param IdxMulOpd is index of operand in Root that is the result of 4519 /// the MUL. In the example above IdxMulOpd is 1. 4520 /// \param MaddOpc the opcode fo the madd instruction 4521 /// \param VR is a virtual register that holds the value of an ADD operand 4522 /// (V in the example above). 4523 /// \param RC Register class of operands 4524 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 4525 const TargetInstrInfo *TII, MachineInstr &Root, 4526 SmallVectorImpl<MachineInstr *> &InsInstrs, 4527 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 4528 const TargetRegisterClass *RC) { 4529 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4530 4531 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4532 Register ResultReg = Root.getOperand(0).getReg(); 4533 Register SrcReg0 = MUL->getOperand(1).getReg(); 4534 bool Src0IsKill = MUL->getOperand(1).isKill(); 4535 Register SrcReg1 = MUL->getOperand(2).getReg(); 4536 bool Src1IsKill = MUL->getOperand(2).isKill(); 4537 4538 if (Register::isVirtualRegister(ResultReg)) 4539 MRI.constrainRegClass(ResultReg, RC); 4540 if (Register::isVirtualRegister(SrcReg0)) 4541 MRI.constrainRegClass(SrcReg0, RC); 4542 if (Register::isVirtualRegister(SrcReg1)) 4543 MRI.constrainRegClass(SrcReg1, RC); 4544 if (Register::isVirtualRegister(VR)) 4545 MRI.constrainRegClass(VR, RC); 4546 4547 MachineInstrBuilder MIB = 4548 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4549 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4550 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4551 .addReg(VR); 4552 // Insert the MADD 4553 InsInstrs.push_back(MIB); 4554 return MUL; 4555 } 4556 4557 /// When getMachineCombinerPatterns() finds potential patterns, 4558 /// this function generates the instructions that could replace the 4559 /// original code sequence 4560 void AArch64InstrInfo::genAlternativeCodeSequence( 4561 MachineInstr &Root, MachineCombinerPattern Pattern, 4562 SmallVectorImpl<MachineInstr *> &InsInstrs, 4563 SmallVectorImpl<MachineInstr *> &DelInstrs, 4564 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 4565 MachineBasicBlock &MBB = *Root.getParent(); 4566 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4567 MachineFunction &MF = *MBB.getParent(); 4568 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4569 4570 MachineInstr *MUL; 4571 const TargetRegisterClass *RC; 4572 unsigned Opc; 4573 switch (Pattern) { 4574 default: 4575 // Reassociate instructions. 4576 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 4577 DelInstrs, InstrIdxForVirtReg); 4578 return; 4579 case MachineCombinerPattern::MULADDW_OP1: 4580 case MachineCombinerPattern::MULADDX_OP1: 4581 // MUL I=A,B,0 4582 // ADD R,I,C 4583 // ==> MADD R,A,B,C 4584 // --- Create(MADD); 4585 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 4586 Opc = AArch64::MADDWrrr; 4587 RC = &AArch64::GPR32RegClass; 4588 } else { 4589 Opc = AArch64::MADDXrrr; 4590 RC = &AArch64::GPR64RegClass; 4591 } 4592 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4593 break; 4594 case MachineCombinerPattern::MULADDW_OP2: 4595 case MachineCombinerPattern::MULADDX_OP2: 4596 // MUL I=A,B,0 4597 // ADD R,C,I 4598 // ==> MADD R,A,B,C 4599 // --- Create(MADD); 4600 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 4601 Opc = AArch64::MADDWrrr; 4602 RC = &AArch64::GPR32RegClass; 4603 } else { 4604 Opc = AArch64::MADDXrrr; 4605 RC = &AArch64::GPR64RegClass; 4606 } 4607 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4608 break; 4609 case MachineCombinerPattern::MULADDWI_OP1: 4610 case MachineCombinerPattern::MULADDXI_OP1: { 4611 // MUL I=A,B,0 4612 // ADD R,I,Imm 4613 // ==> ORR V, ZR, Imm 4614 // ==> MADD R,A,B,V 4615 // --- Create(MADD); 4616 const TargetRegisterClass *OrrRC; 4617 unsigned BitSize, OrrOpc, ZeroReg; 4618 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 4619 OrrOpc = AArch64::ORRWri; 4620 OrrRC = &AArch64::GPR32spRegClass; 4621 BitSize = 32; 4622 ZeroReg = AArch64::WZR; 4623 Opc = AArch64::MADDWrrr; 4624 RC = &AArch64::GPR32RegClass; 4625 } else { 4626 OrrOpc = AArch64::ORRXri; 4627 OrrRC = &AArch64::GPR64spRegClass; 4628 BitSize = 64; 4629 ZeroReg = AArch64::XZR; 4630 Opc = AArch64::MADDXrrr; 4631 RC = &AArch64::GPR64RegClass; 4632 } 4633 Register NewVR = MRI.createVirtualRegister(OrrRC); 4634 uint64_t Imm = Root.getOperand(2).getImm(); 4635 4636 if (Root.getOperand(3).isImm()) { 4637 unsigned Val = Root.getOperand(3).getImm(); 4638 Imm = Imm << Val; 4639 } 4640 uint64_t UImm = SignExtend64(Imm, BitSize); 4641 uint64_t Encoding; 4642 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4643 MachineInstrBuilder MIB1 = 4644 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4645 .addReg(ZeroReg) 4646 .addImm(Encoding); 4647 InsInstrs.push_back(MIB1); 4648 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4649 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4650 } 4651 break; 4652 } 4653 case MachineCombinerPattern::MULSUBW_OP1: 4654 case MachineCombinerPattern::MULSUBX_OP1: { 4655 // MUL I=A,B,0 4656 // SUB R,I, C 4657 // ==> SUB V, 0, C 4658 // ==> MADD R,A,B,V // = -C + A*B 4659 // --- Create(MADD); 4660 const TargetRegisterClass *SubRC; 4661 unsigned SubOpc, ZeroReg; 4662 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 4663 SubOpc = AArch64::SUBWrr; 4664 SubRC = &AArch64::GPR32spRegClass; 4665 ZeroReg = AArch64::WZR; 4666 Opc = AArch64::MADDWrrr; 4667 RC = &AArch64::GPR32RegClass; 4668 } else { 4669 SubOpc = AArch64::SUBXrr; 4670 SubRC = &AArch64::GPR64spRegClass; 4671 ZeroReg = AArch64::XZR; 4672 Opc = AArch64::MADDXrrr; 4673 RC = &AArch64::GPR64RegClass; 4674 } 4675 Register NewVR = MRI.createVirtualRegister(SubRC); 4676 // SUB NewVR, 0, C 4677 MachineInstrBuilder MIB1 = 4678 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 4679 .addReg(ZeroReg) 4680 .add(Root.getOperand(2)); 4681 InsInstrs.push_back(MIB1); 4682 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4683 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4684 break; 4685 } 4686 case MachineCombinerPattern::MULSUBW_OP2: 4687 case MachineCombinerPattern::MULSUBX_OP2: 4688 // MUL I=A,B,0 4689 // SUB R,C,I 4690 // ==> MSUB R,A,B,C (computes C - A*B) 4691 // --- Create(MSUB); 4692 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 4693 Opc = AArch64::MSUBWrrr; 4694 RC = &AArch64::GPR32RegClass; 4695 } else { 4696 Opc = AArch64::MSUBXrrr; 4697 RC = &AArch64::GPR64RegClass; 4698 } 4699 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4700 break; 4701 case MachineCombinerPattern::MULSUBWI_OP1: 4702 case MachineCombinerPattern::MULSUBXI_OP1: { 4703 // MUL I=A,B,0 4704 // SUB R,I, Imm 4705 // ==> ORR V, ZR, -Imm 4706 // ==> MADD R,A,B,V // = -Imm + A*B 4707 // --- Create(MADD); 4708 const TargetRegisterClass *OrrRC; 4709 unsigned BitSize, OrrOpc, ZeroReg; 4710 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 4711 OrrOpc = AArch64::ORRWri; 4712 OrrRC = &AArch64::GPR32spRegClass; 4713 BitSize = 32; 4714 ZeroReg = AArch64::WZR; 4715 Opc = AArch64::MADDWrrr; 4716 RC = &AArch64::GPR32RegClass; 4717 } else { 4718 OrrOpc = AArch64::ORRXri; 4719 OrrRC = &AArch64::GPR64spRegClass; 4720 BitSize = 64; 4721 ZeroReg = AArch64::XZR; 4722 Opc = AArch64::MADDXrrr; 4723 RC = &AArch64::GPR64RegClass; 4724 } 4725 Register NewVR = MRI.createVirtualRegister(OrrRC); 4726 uint64_t Imm = Root.getOperand(2).getImm(); 4727 if (Root.getOperand(3).isImm()) { 4728 unsigned Val = Root.getOperand(3).getImm(); 4729 Imm = Imm << Val; 4730 } 4731 uint64_t UImm = SignExtend64(-Imm, BitSize); 4732 uint64_t Encoding; 4733 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4734 MachineInstrBuilder MIB1 = 4735 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4736 .addReg(ZeroReg) 4737 .addImm(Encoding); 4738 InsInstrs.push_back(MIB1); 4739 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4740 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4741 } 4742 break; 4743 } 4744 4745 case MachineCombinerPattern::MULADDv8i8_OP1: 4746 Opc = AArch64::MLAv8i8; 4747 RC = &AArch64::FPR64RegClass; 4748 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4749 break; 4750 case MachineCombinerPattern::MULADDv8i8_OP2: 4751 Opc = AArch64::MLAv8i8; 4752 RC = &AArch64::FPR64RegClass; 4753 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4754 break; 4755 case MachineCombinerPattern::MULADDv16i8_OP1: 4756 Opc = AArch64::MLAv16i8; 4757 RC = &AArch64::FPR128RegClass; 4758 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4759 break; 4760 case MachineCombinerPattern::MULADDv16i8_OP2: 4761 Opc = AArch64::MLAv16i8; 4762 RC = &AArch64::FPR128RegClass; 4763 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4764 break; 4765 case MachineCombinerPattern::MULADDv4i16_OP1: 4766 Opc = AArch64::MLAv4i16; 4767 RC = &AArch64::FPR64RegClass; 4768 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4769 break; 4770 case MachineCombinerPattern::MULADDv4i16_OP2: 4771 Opc = AArch64::MLAv4i16; 4772 RC = &AArch64::FPR64RegClass; 4773 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4774 break; 4775 case MachineCombinerPattern::MULADDv8i16_OP1: 4776 Opc = AArch64::MLAv8i16; 4777 RC = &AArch64::FPR128RegClass; 4778 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4779 break; 4780 case MachineCombinerPattern::MULADDv8i16_OP2: 4781 Opc = AArch64::MLAv8i16; 4782 RC = &AArch64::FPR128RegClass; 4783 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4784 break; 4785 case MachineCombinerPattern::MULADDv2i32_OP1: 4786 Opc = AArch64::MLAv2i32; 4787 RC = &AArch64::FPR64RegClass; 4788 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4789 break; 4790 case MachineCombinerPattern::MULADDv2i32_OP2: 4791 Opc = AArch64::MLAv2i32; 4792 RC = &AArch64::FPR64RegClass; 4793 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4794 break; 4795 case MachineCombinerPattern::MULADDv4i32_OP1: 4796 Opc = AArch64::MLAv4i32; 4797 RC = &AArch64::FPR128RegClass; 4798 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4799 break; 4800 case MachineCombinerPattern::MULADDv4i32_OP2: 4801 Opc = AArch64::MLAv4i32; 4802 RC = &AArch64::FPR128RegClass; 4803 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4804 break; 4805 4806 case MachineCombinerPattern::MULSUBv8i8_OP1: 4807 Opc = AArch64::MLAv8i8; 4808 RC = &AArch64::FPR64RegClass; 4809 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4810 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8, 4811 RC); 4812 break; 4813 case MachineCombinerPattern::MULSUBv8i8_OP2: 4814 Opc = AArch64::MLSv8i8; 4815 RC = &AArch64::FPR64RegClass; 4816 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4817 break; 4818 case MachineCombinerPattern::MULSUBv16i8_OP1: 4819 Opc = AArch64::MLAv16i8; 4820 RC = &AArch64::FPR128RegClass; 4821 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4822 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8, 4823 RC); 4824 break; 4825 case MachineCombinerPattern::MULSUBv16i8_OP2: 4826 Opc = AArch64::MLSv16i8; 4827 RC = &AArch64::FPR128RegClass; 4828 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4829 break; 4830 case MachineCombinerPattern::MULSUBv4i16_OP1: 4831 Opc = AArch64::MLAv4i16; 4832 RC = &AArch64::FPR64RegClass; 4833 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4834 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 4835 RC); 4836 break; 4837 case MachineCombinerPattern::MULSUBv4i16_OP2: 4838 Opc = AArch64::MLSv4i16; 4839 RC = &AArch64::FPR64RegClass; 4840 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4841 break; 4842 case MachineCombinerPattern::MULSUBv8i16_OP1: 4843 Opc = AArch64::MLAv8i16; 4844 RC = &AArch64::FPR128RegClass; 4845 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4846 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 4847 RC); 4848 break; 4849 case MachineCombinerPattern::MULSUBv8i16_OP2: 4850 Opc = AArch64::MLSv8i16; 4851 RC = &AArch64::FPR128RegClass; 4852 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4853 break; 4854 case MachineCombinerPattern::MULSUBv2i32_OP1: 4855 Opc = AArch64::MLAv2i32; 4856 RC = &AArch64::FPR64RegClass; 4857 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4858 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 4859 RC); 4860 break; 4861 case MachineCombinerPattern::MULSUBv2i32_OP2: 4862 Opc = AArch64::MLSv2i32; 4863 RC = &AArch64::FPR64RegClass; 4864 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4865 break; 4866 case MachineCombinerPattern::MULSUBv4i32_OP1: 4867 Opc = AArch64::MLAv4i32; 4868 RC = &AArch64::FPR128RegClass; 4869 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs, 4870 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 4871 RC); 4872 break; 4873 case MachineCombinerPattern::MULSUBv4i32_OP2: 4874 Opc = AArch64::MLSv4i32; 4875 RC = &AArch64::FPR128RegClass; 4876 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4877 break; 4878 4879 case MachineCombinerPattern::MULADDv4i16_indexed_OP1: 4880 Opc = AArch64::MLAv4i16_indexed; 4881 RC = &AArch64::FPR64RegClass; 4882 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4883 break; 4884 case MachineCombinerPattern::MULADDv4i16_indexed_OP2: 4885 Opc = AArch64::MLAv4i16_indexed; 4886 RC = &AArch64::FPR64RegClass; 4887 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4888 break; 4889 case MachineCombinerPattern::MULADDv8i16_indexed_OP1: 4890 Opc = AArch64::MLAv8i16_indexed; 4891 RC = &AArch64::FPR128RegClass; 4892 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4893 break; 4894 case MachineCombinerPattern::MULADDv8i16_indexed_OP2: 4895 Opc = AArch64::MLAv8i16_indexed; 4896 RC = &AArch64::FPR128RegClass; 4897 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4898 break; 4899 case MachineCombinerPattern::MULADDv2i32_indexed_OP1: 4900 Opc = AArch64::MLAv2i32_indexed; 4901 RC = &AArch64::FPR64RegClass; 4902 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4903 break; 4904 case MachineCombinerPattern::MULADDv2i32_indexed_OP2: 4905 Opc = AArch64::MLAv2i32_indexed; 4906 RC = &AArch64::FPR64RegClass; 4907 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4908 break; 4909 case MachineCombinerPattern::MULADDv4i32_indexed_OP1: 4910 Opc = AArch64::MLAv4i32_indexed; 4911 RC = &AArch64::FPR128RegClass; 4912 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4913 break; 4914 case MachineCombinerPattern::MULADDv4i32_indexed_OP2: 4915 Opc = AArch64::MLAv4i32_indexed; 4916 RC = &AArch64::FPR128RegClass; 4917 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4918 break; 4919 4920 case MachineCombinerPattern::MULSUBv4i16_indexed_OP1: 4921 Opc = AArch64::MLAv4i16_indexed; 4922 RC = &AArch64::FPR64RegClass; 4923 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4924 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16, 4925 RC); 4926 break; 4927 case MachineCombinerPattern::MULSUBv4i16_indexed_OP2: 4928 Opc = AArch64::MLSv4i16_indexed; 4929 RC = &AArch64::FPR64RegClass; 4930 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4931 break; 4932 case MachineCombinerPattern::MULSUBv8i16_indexed_OP1: 4933 Opc = AArch64::MLAv8i16_indexed; 4934 RC = &AArch64::FPR128RegClass; 4935 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4936 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16, 4937 RC); 4938 break; 4939 case MachineCombinerPattern::MULSUBv8i16_indexed_OP2: 4940 Opc = AArch64::MLSv8i16_indexed; 4941 RC = &AArch64::FPR128RegClass; 4942 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4943 break; 4944 case MachineCombinerPattern::MULSUBv2i32_indexed_OP1: 4945 Opc = AArch64::MLAv2i32_indexed; 4946 RC = &AArch64::FPR64RegClass; 4947 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4948 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32, 4949 RC); 4950 break; 4951 case MachineCombinerPattern::MULSUBv2i32_indexed_OP2: 4952 Opc = AArch64::MLSv2i32_indexed; 4953 RC = &AArch64::FPR64RegClass; 4954 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4955 break; 4956 case MachineCombinerPattern::MULSUBv4i32_indexed_OP1: 4957 Opc = AArch64::MLAv4i32_indexed; 4958 RC = &AArch64::FPR128RegClass; 4959 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs, 4960 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32, 4961 RC); 4962 break; 4963 case MachineCombinerPattern::MULSUBv4i32_indexed_OP2: 4964 Opc = AArch64::MLSv4i32_indexed; 4965 RC = &AArch64::FPR128RegClass; 4966 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4967 break; 4968 4969 // Floating Point Support 4970 case MachineCombinerPattern::FMULADDH_OP1: 4971 Opc = AArch64::FMADDHrrr; 4972 RC = &AArch64::FPR16RegClass; 4973 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4974 break; 4975 case MachineCombinerPattern::FMULADDS_OP1: 4976 Opc = AArch64::FMADDSrrr; 4977 RC = &AArch64::FPR32RegClass; 4978 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4979 break; 4980 case MachineCombinerPattern::FMULADDD_OP1: 4981 Opc = AArch64::FMADDDrrr; 4982 RC = &AArch64::FPR64RegClass; 4983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4984 break; 4985 4986 case MachineCombinerPattern::FMULADDH_OP2: 4987 Opc = AArch64::FMADDHrrr; 4988 RC = &AArch64::FPR16RegClass; 4989 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4990 break; 4991 case MachineCombinerPattern::FMULADDS_OP2: 4992 Opc = AArch64::FMADDSrrr; 4993 RC = &AArch64::FPR32RegClass; 4994 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4995 break; 4996 case MachineCombinerPattern::FMULADDD_OP2: 4997 Opc = AArch64::FMADDDrrr; 4998 RC = &AArch64::FPR64RegClass; 4999 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5000 break; 5001 5002 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 5003 Opc = AArch64::FMLAv1i32_indexed; 5004 RC = &AArch64::FPR32RegClass; 5005 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5006 FMAInstKind::Indexed); 5007 break; 5008 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 5009 Opc = AArch64::FMLAv1i32_indexed; 5010 RC = &AArch64::FPR32RegClass; 5011 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5012 FMAInstKind::Indexed); 5013 break; 5014 5015 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 5016 Opc = AArch64::FMLAv1i64_indexed; 5017 RC = &AArch64::FPR64RegClass; 5018 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5019 FMAInstKind::Indexed); 5020 break; 5021 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 5022 Opc = AArch64::FMLAv1i64_indexed; 5023 RC = &AArch64::FPR64RegClass; 5024 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5025 FMAInstKind::Indexed); 5026 break; 5027 5028 case MachineCombinerPattern::FMLAv4i16_indexed_OP1: 5029 RC = &AArch64::FPR64RegClass; 5030 Opc = AArch64::FMLAv4i16_indexed; 5031 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5032 FMAInstKind::Indexed); 5033 break; 5034 case MachineCombinerPattern::FMLAv4f16_OP1: 5035 RC = &AArch64::FPR64RegClass; 5036 Opc = AArch64::FMLAv4f16; 5037 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5038 FMAInstKind::Accumulator); 5039 break; 5040 case MachineCombinerPattern::FMLAv4i16_indexed_OP2: 5041 RC = &AArch64::FPR64RegClass; 5042 Opc = AArch64::FMLAv4i16_indexed; 5043 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5044 FMAInstKind::Indexed); 5045 break; 5046 case MachineCombinerPattern::FMLAv4f16_OP2: 5047 RC = &AArch64::FPR64RegClass; 5048 Opc = AArch64::FMLAv4f16; 5049 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5050 FMAInstKind::Accumulator); 5051 break; 5052 5053 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 5054 case MachineCombinerPattern::FMLAv2f32_OP1: 5055 RC = &AArch64::FPR64RegClass; 5056 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 5057 Opc = AArch64::FMLAv2i32_indexed; 5058 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5059 FMAInstKind::Indexed); 5060 } else { 5061 Opc = AArch64::FMLAv2f32; 5062 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5063 FMAInstKind::Accumulator); 5064 } 5065 break; 5066 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 5067 case MachineCombinerPattern::FMLAv2f32_OP2: 5068 RC = &AArch64::FPR64RegClass; 5069 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 5070 Opc = AArch64::FMLAv2i32_indexed; 5071 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5072 FMAInstKind::Indexed); 5073 } else { 5074 Opc = AArch64::FMLAv2f32; 5075 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5076 FMAInstKind::Accumulator); 5077 } 5078 break; 5079 5080 case MachineCombinerPattern::FMLAv8i16_indexed_OP1: 5081 RC = &AArch64::FPR128RegClass; 5082 Opc = AArch64::FMLAv8i16_indexed; 5083 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5084 FMAInstKind::Indexed); 5085 break; 5086 case MachineCombinerPattern::FMLAv8f16_OP1: 5087 RC = &AArch64::FPR128RegClass; 5088 Opc = AArch64::FMLAv8f16; 5089 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5090 FMAInstKind::Accumulator); 5091 break; 5092 case MachineCombinerPattern::FMLAv8i16_indexed_OP2: 5093 RC = &AArch64::FPR128RegClass; 5094 Opc = AArch64::FMLAv8i16_indexed; 5095 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5096 FMAInstKind::Indexed); 5097 break; 5098 case MachineCombinerPattern::FMLAv8f16_OP2: 5099 RC = &AArch64::FPR128RegClass; 5100 Opc = AArch64::FMLAv8f16; 5101 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5102 FMAInstKind::Accumulator); 5103 break; 5104 5105 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 5106 case MachineCombinerPattern::FMLAv2f64_OP1: 5107 RC = &AArch64::FPR128RegClass; 5108 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 5109 Opc = AArch64::FMLAv2i64_indexed; 5110 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5111 FMAInstKind::Indexed); 5112 } else { 5113 Opc = AArch64::FMLAv2f64; 5114 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5115 FMAInstKind::Accumulator); 5116 } 5117 break; 5118 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 5119 case MachineCombinerPattern::FMLAv2f64_OP2: 5120 RC = &AArch64::FPR128RegClass; 5121 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 5122 Opc = AArch64::FMLAv2i64_indexed; 5123 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5124 FMAInstKind::Indexed); 5125 } else { 5126 Opc = AArch64::FMLAv2f64; 5127 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5128 FMAInstKind::Accumulator); 5129 } 5130 break; 5131 5132 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 5133 case MachineCombinerPattern::FMLAv4f32_OP1: 5134 RC = &AArch64::FPR128RegClass; 5135 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 5136 Opc = AArch64::FMLAv4i32_indexed; 5137 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5138 FMAInstKind::Indexed); 5139 } else { 5140 Opc = AArch64::FMLAv4f32; 5141 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5142 FMAInstKind::Accumulator); 5143 } 5144 break; 5145 5146 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 5147 case MachineCombinerPattern::FMLAv4f32_OP2: 5148 RC = &AArch64::FPR128RegClass; 5149 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 5150 Opc = AArch64::FMLAv4i32_indexed; 5151 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5152 FMAInstKind::Indexed); 5153 } else { 5154 Opc = AArch64::FMLAv4f32; 5155 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5156 FMAInstKind::Accumulator); 5157 } 5158 break; 5159 5160 case MachineCombinerPattern::FMULSUBH_OP1: 5161 Opc = AArch64::FNMSUBHrrr; 5162 RC = &AArch64::FPR16RegClass; 5163 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5164 break; 5165 case MachineCombinerPattern::FMULSUBS_OP1: 5166 Opc = AArch64::FNMSUBSrrr; 5167 RC = &AArch64::FPR32RegClass; 5168 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5169 break; 5170 case MachineCombinerPattern::FMULSUBD_OP1: 5171 Opc = AArch64::FNMSUBDrrr; 5172 RC = &AArch64::FPR64RegClass; 5173 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5174 break; 5175 5176 case MachineCombinerPattern::FNMULSUBH_OP1: 5177 Opc = AArch64::FNMADDHrrr; 5178 RC = &AArch64::FPR16RegClass; 5179 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5180 break; 5181 case MachineCombinerPattern::FNMULSUBS_OP1: 5182 Opc = AArch64::FNMADDSrrr; 5183 RC = &AArch64::FPR32RegClass; 5184 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5185 break; 5186 case MachineCombinerPattern::FNMULSUBD_OP1: 5187 Opc = AArch64::FNMADDDrrr; 5188 RC = &AArch64::FPR64RegClass; 5189 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 5190 break; 5191 5192 case MachineCombinerPattern::FMULSUBH_OP2: 5193 Opc = AArch64::FMSUBHrrr; 5194 RC = &AArch64::FPR16RegClass; 5195 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5196 break; 5197 case MachineCombinerPattern::FMULSUBS_OP2: 5198 Opc = AArch64::FMSUBSrrr; 5199 RC = &AArch64::FPR32RegClass; 5200 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5201 break; 5202 case MachineCombinerPattern::FMULSUBD_OP2: 5203 Opc = AArch64::FMSUBDrrr; 5204 RC = &AArch64::FPR64RegClass; 5205 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 5206 break; 5207 5208 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 5209 Opc = AArch64::FMLSv1i32_indexed; 5210 RC = &AArch64::FPR32RegClass; 5211 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5212 FMAInstKind::Indexed); 5213 break; 5214 5215 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 5216 Opc = AArch64::FMLSv1i64_indexed; 5217 RC = &AArch64::FPR64RegClass; 5218 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5219 FMAInstKind::Indexed); 5220 break; 5221 5222 case MachineCombinerPattern::FMLSv4f16_OP1: 5223 case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { 5224 RC = &AArch64::FPR64RegClass; 5225 Register NewVR = MRI.createVirtualRegister(RC); 5226 MachineInstrBuilder MIB1 = 5227 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) 5228 .add(Root.getOperand(2)); 5229 InsInstrs.push_back(MIB1); 5230 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5231 if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { 5232 Opc = AArch64::FMLAv4f16; 5233 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5234 FMAInstKind::Accumulator, &NewVR); 5235 } else { 5236 Opc = AArch64::FMLAv4i16_indexed; 5237 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5238 FMAInstKind::Indexed, &NewVR); 5239 } 5240 break; 5241 } 5242 case MachineCombinerPattern::FMLSv4f16_OP2: 5243 RC = &AArch64::FPR64RegClass; 5244 Opc = AArch64::FMLSv4f16; 5245 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5246 FMAInstKind::Accumulator); 5247 break; 5248 case MachineCombinerPattern::FMLSv4i16_indexed_OP2: 5249 RC = &AArch64::FPR64RegClass; 5250 Opc = AArch64::FMLSv4i16_indexed; 5251 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5252 FMAInstKind::Indexed); 5253 break; 5254 5255 case MachineCombinerPattern::FMLSv2f32_OP2: 5256 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 5257 RC = &AArch64::FPR64RegClass; 5258 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 5259 Opc = AArch64::FMLSv2i32_indexed; 5260 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5261 FMAInstKind::Indexed); 5262 } else { 5263 Opc = AArch64::FMLSv2f32; 5264 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5265 FMAInstKind::Accumulator); 5266 } 5267 break; 5268 5269 case MachineCombinerPattern::FMLSv8f16_OP1: 5270 case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { 5271 RC = &AArch64::FPR128RegClass; 5272 Register NewVR = MRI.createVirtualRegister(RC); 5273 MachineInstrBuilder MIB1 = 5274 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) 5275 .add(Root.getOperand(2)); 5276 InsInstrs.push_back(MIB1); 5277 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5278 if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { 5279 Opc = AArch64::FMLAv8f16; 5280 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5281 FMAInstKind::Accumulator, &NewVR); 5282 } else { 5283 Opc = AArch64::FMLAv8i16_indexed; 5284 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5285 FMAInstKind::Indexed, &NewVR); 5286 } 5287 break; 5288 } 5289 case MachineCombinerPattern::FMLSv8f16_OP2: 5290 RC = &AArch64::FPR128RegClass; 5291 Opc = AArch64::FMLSv8f16; 5292 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5293 FMAInstKind::Accumulator); 5294 break; 5295 case MachineCombinerPattern::FMLSv8i16_indexed_OP2: 5296 RC = &AArch64::FPR128RegClass; 5297 Opc = AArch64::FMLSv8i16_indexed; 5298 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5299 FMAInstKind::Indexed); 5300 break; 5301 5302 case MachineCombinerPattern::FMLSv2f64_OP2: 5303 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 5304 RC = &AArch64::FPR128RegClass; 5305 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 5306 Opc = AArch64::FMLSv2i64_indexed; 5307 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5308 FMAInstKind::Indexed); 5309 } else { 5310 Opc = AArch64::FMLSv2f64; 5311 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5312 FMAInstKind::Accumulator); 5313 } 5314 break; 5315 5316 case MachineCombinerPattern::FMLSv4f32_OP2: 5317 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 5318 RC = &AArch64::FPR128RegClass; 5319 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 5320 Opc = AArch64::FMLSv4i32_indexed; 5321 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5322 FMAInstKind::Indexed); 5323 } else { 5324 Opc = AArch64::FMLSv4f32; 5325 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 5326 FMAInstKind::Accumulator); 5327 } 5328 break; 5329 case MachineCombinerPattern::FMLSv2f32_OP1: 5330 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 5331 RC = &AArch64::FPR64RegClass; 5332 Register NewVR = MRI.createVirtualRegister(RC); 5333 MachineInstrBuilder MIB1 = 5334 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 5335 .add(Root.getOperand(2)); 5336 InsInstrs.push_back(MIB1); 5337 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5338 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 5339 Opc = AArch64::FMLAv2i32_indexed; 5340 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5341 FMAInstKind::Indexed, &NewVR); 5342 } else { 5343 Opc = AArch64::FMLAv2f32; 5344 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5345 FMAInstKind::Accumulator, &NewVR); 5346 } 5347 break; 5348 } 5349 case MachineCombinerPattern::FMLSv4f32_OP1: 5350 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 5351 RC = &AArch64::FPR128RegClass; 5352 Register NewVR = MRI.createVirtualRegister(RC); 5353 MachineInstrBuilder MIB1 = 5354 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 5355 .add(Root.getOperand(2)); 5356 InsInstrs.push_back(MIB1); 5357 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5358 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 5359 Opc = AArch64::FMLAv4i32_indexed; 5360 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5361 FMAInstKind::Indexed, &NewVR); 5362 } else { 5363 Opc = AArch64::FMLAv4f32; 5364 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5365 FMAInstKind::Accumulator, &NewVR); 5366 } 5367 break; 5368 } 5369 case MachineCombinerPattern::FMLSv2f64_OP1: 5370 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 5371 RC = &AArch64::FPR128RegClass; 5372 Register NewVR = MRI.createVirtualRegister(RC); 5373 MachineInstrBuilder MIB1 = 5374 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 5375 .add(Root.getOperand(2)); 5376 InsInstrs.push_back(MIB1); 5377 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 5378 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 5379 Opc = AArch64::FMLAv2i64_indexed; 5380 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5381 FMAInstKind::Indexed, &NewVR); 5382 } else { 5383 Opc = AArch64::FMLAv2f64; 5384 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 5385 FMAInstKind::Accumulator, &NewVR); 5386 } 5387 break; 5388 } 5389 } // end switch (Pattern) 5390 // Record MUL and ADD/SUB for deletion 5391 DelInstrs.push_back(MUL); 5392 DelInstrs.push_back(&Root); 5393 } 5394 5395 /// Replace csincr-branch sequence by simple conditional branch 5396 /// 5397 /// Examples: 5398 /// 1. \code 5399 /// csinc w9, wzr, wzr, <condition code> 5400 /// tbnz w9, #0, 0x44 5401 /// \endcode 5402 /// to 5403 /// \code 5404 /// b.<inverted condition code> 5405 /// \endcode 5406 /// 5407 /// 2. \code 5408 /// csinc w9, wzr, wzr, <condition code> 5409 /// tbz w9, #0, 0x44 5410 /// \endcode 5411 /// to 5412 /// \code 5413 /// b.<condition code> 5414 /// \endcode 5415 /// 5416 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 5417 /// compare's constant operand is power of 2. 5418 /// 5419 /// Examples: 5420 /// \code 5421 /// and w8, w8, #0x400 5422 /// cbnz w8, L1 5423 /// \endcode 5424 /// to 5425 /// \code 5426 /// tbnz w8, #10, L1 5427 /// \endcode 5428 /// 5429 /// \param MI Conditional Branch 5430 /// \return True when the simple conditional branch is generated 5431 /// 5432 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 5433 bool IsNegativeBranch = false; 5434 bool IsTestAndBranch = false; 5435 unsigned TargetBBInMI = 0; 5436 switch (MI.getOpcode()) { 5437 default: 5438 llvm_unreachable("Unknown branch instruction?"); 5439 case AArch64::Bcc: 5440 return false; 5441 case AArch64::CBZW: 5442 case AArch64::CBZX: 5443 TargetBBInMI = 1; 5444 break; 5445 case AArch64::CBNZW: 5446 case AArch64::CBNZX: 5447 TargetBBInMI = 1; 5448 IsNegativeBranch = true; 5449 break; 5450 case AArch64::TBZW: 5451 case AArch64::TBZX: 5452 TargetBBInMI = 2; 5453 IsTestAndBranch = true; 5454 break; 5455 case AArch64::TBNZW: 5456 case AArch64::TBNZX: 5457 TargetBBInMI = 2; 5458 IsNegativeBranch = true; 5459 IsTestAndBranch = true; 5460 break; 5461 } 5462 // So we increment a zero register and test for bits other 5463 // than bit 0? Conservatively bail out in case the verifier 5464 // missed this case. 5465 if (IsTestAndBranch && MI.getOperand(1).getImm()) 5466 return false; 5467 5468 // Find Definition. 5469 assert(MI.getParent() && "Incomplete machine instruciton\n"); 5470 MachineBasicBlock *MBB = MI.getParent(); 5471 MachineFunction *MF = MBB->getParent(); 5472 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5473 Register VReg = MI.getOperand(0).getReg(); 5474 if (!Register::isVirtualRegister(VReg)) 5475 return false; 5476 5477 MachineInstr *DefMI = MRI->getVRegDef(VReg); 5478 5479 // Look through COPY instructions to find definition. 5480 while (DefMI->isCopy()) { 5481 Register CopyVReg = DefMI->getOperand(1).getReg(); 5482 if (!MRI->hasOneNonDBGUse(CopyVReg)) 5483 return false; 5484 if (!MRI->hasOneDef(CopyVReg)) 5485 return false; 5486 DefMI = MRI->getVRegDef(CopyVReg); 5487 } 5488 5489 switch (DefMI->getOpcode()) { 5490 default: 5491 return false; 5492 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 5493 case AArch64::ANDWri: 5494 case AArch64::ANDXri: { 5495 if (IsTestAndBranch) 5496 return false; 5497 if (DefMI->getParent() != MBB) 5498 return false; 5499 if (!MRI->hasOneNonDBGUse(VReg)) 5500 return false; 5501 5502 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 5503 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 5504 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 5505 if (!isPowerOf2_64(Mask)) 5506 return false; 5507 5508 MachineOperand &MO = DefMI->getOperand(1); 5509 Register NewReg = MO.getReg(); 5510 if (!Register::isVirtualRegister(NewReg)) 5511 return false; 5512 5513 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 5514 5515 MachineBasicBlock &RefToMBB = *MBB; 5516 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 5517 DebugLoc DL = MI.getDebugLoc(); 5518 unsigned Imm = Log2_64(Mask); 5519 unsigned Opc = (Imm < 32) 5520 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 5521 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 5522 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 5523 .addReg(NewReg) 5524 .addImm(Imm) 5525 .addMBB(TBB); 5526 // Register lives on to the CBZ now. 5527 MO.setIsKill(false); 5528 5529 // For immediate smaller than 32, we need to use the 32-bit 5530 // variant (W) in all cases. Indeed the 64-bit variant does not 5531 // allow to encode them. 5532 // Therefore, if the input register is 64-bit, we need to take the 5533 // 32-bit sub-part. 5534 if (!Is32Bit && Imm < 32) 5535 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 5536 MI.eraseFromParent(); 5537 return true; 5538 } 5539 // Look for CSINC 5540 case AArch64::CSINCWr: 5541 case AArch64::CSINCXr: { 5542 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 5543 DefMI->getOperand(2).getReg() == AArch64::WZR) && 5544 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 5545 DefMI->getOperand(2).getReg() == AArch64::XZR)) 5546 return false; 5547 5548 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 5549 return false; 5550 5551 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 5552 // Convert only when the condition code is not modified between 5553 // the CSINC and the branch. The CC may be used by other 5554 // instructions in between. 5555 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 5556 return false; 5557 MachineBasicBlock &RefToMBB = *MBB; 5558 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 5559 DebugLoc DL = MI.getDebugLoc(); 5560 if (IsNegativeBranch) 5561 CC = AArch64CC::getInvertedCondCode(CC); 5562 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 5563 MI.eraseFromParent(); 5564 return true; 5565 } 5566 } 5567 } 5568 5569 std::pair<unsigned, unsigned> 5570 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 5571 const unsigned Mask = AArch64II::MO_FRAGMENT; 5572 return std::make_pair(TF & Mask, TF & ~Mask); 5573 } 5574 5575 ArrayRef<std::pair<unsigned, const char *>> 5576 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 5577 using namespace AArch64II; 5578 5579 static const std::pair<unsigned, const char *> TargetFlags[] = { 5580 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 5581 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 5582 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 5583 {MO_HI12, "aarch64-hi12"}}; 5584 return makeArrayRef(TargetFlags); 5585 } 5586 5587 ArrayRef<std::pair<unsigned, const char *>> 5588 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 5589 using namespace AArch64II; 5590 5591 static const std::pair<unsigned, const char *> TargetFlags[] = { 5592 {MO_COFFSTUB, "aarch64-coffstub"}, 5593 {MO_GOT, "aarch64-got"}, 5594 {MO_NC, "aarch64-nc"}, 5595 {MO_S, "aarch64-s"}, 5596 {MO_TLS, "aarch64-tls"}, 5597 {MO_DLLIMPORT, "aarch64-dllimport"}, 5598 {MO_PREL, "aarch64-prel"}, 5599 {MO_TAGGED, "aarch64-tagged"}}; 5600 return makeArrayRef(TargetFlags); 5601 } 5602 5603 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 5604 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 5605 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 5606 {{MOSuppressPair, "aarch64-suppress-pair"}, 5607 {MOStridedAccess, "aarch64-strided-access"}}; 5608 return makeArrayRef(TargetFlags); 5609 } 5610 5611 /// Constants defining how certain sequences should be outlined. 5612 /// This encompasses how an outlined function should be called, and what kind of 5613 /// frame should be emitted for that outlined function. 5614 /// 5615 /// \p MachineOutlinerDefault implies that the function should be called with 5616 /// a save and restore of LR to the stack. 5617 /// 5618 /// That is, 5619 /// 5620 /// I1 Save LR OUTLINED_FUNCTION: 5621 /// I2 --> BL OUTLINED_FUNCTION I1 5622 /// I3 Restore LR I2 5623 /// I3 5624 /// RET 5625 /// 5626 /// * Call construction overhead: 3 (save + BL + restore) 5627 /// * Frame construction overhead: 1 (ret) 5628 /// * Requires stack fixups? Yes 5629 /// 5630 /// \p MachineOutlinerTailCall implies that the function is being created from 5631 /// a sequence of instructions ending in a return. 5632 /// 5633 /// That is, 5634 /// 5635 /// I1 OUTLINED_FUNCTION: 5636 /// I2 --> B OUTLINED_FUNCTION I1 5637 /// RET I2 5638 /// RET 5639 /// 5640 /// * Call construction overhead: 1 (B) 5641 /// * Frame construction overhead: 0 (Return included in sequence) 5642 /// * Requires stack fixups? No 5643 /// 5644 /// \p MachineOutlinerNoLRSave implies that the function should be called using 5645 /// a BL instruction, but doesn't require LR to be saved and restored. This 5646 /// happens when LR is known to be dead. 5647 /// 5648 /// That is, 5649 /// 5650 /// I1 OUTLINED_FUNCTION: 5651 /// I2 --> BL OUTLINED_FUNCTION I1 5652 /// I3 I2 5653 /// I3 5654 /// RET 5655 /// 5656 /// * Call construction overhead: 1 (BL) 5657 /// * Frame construction overhead: 1 (RET) 5658 /// * Requires stack fixups? No 5659 /// 5660 /// \p MachineOutlinerThunk implies that the function is being created from 5661 /// a sequence of instructions ending in a call. The outlined function is 5662 /// called with a BL instruction, and the outlined function tail-calls the 5663 /// original call destination. 5664 /// 5665 /// That is, 5666 /// 5667 /// I1 OUTLINED_FUNCTION: 5668 /// I2 --> BL OUTLINED_FUNCTION I1 5669 /// BL f I2 5670 /// B f 5671 /// * Call construction overhead: 1 (BL) 5672 /// * Frame construction overhead: 0 5673 /// * Requires stack fixups? No 5674 /// 5675 /// \p MachineOutlinerRegSave implies that the function should be called with a 5676 /// save and restore of LR to an available register. This allows us to avoid 5677 /// stack fixups. Note that this outlining variant is compatible with the 5678 /// NoLRSave case. 5679 /// 5680 /// That is, 5681 /// 5682 /// I1 Save LR OUTLINED_FUNCTION: 5683 /// I2 --> BL OUTLINED_FUNCTION I1 5684 /// I3 Restore LR I2 5685 /// I3 5686 /// RET 5687 /// 5688 /// * Call construction overhead: 3 (save + BL + restore) 5689 /// * Frame construction overhead: 1 (ret) 5690 /// * Requires stack fixups? No 5691 enum MachineOutlinerClass { 5692 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 5693 MachineOutlinerTailCall, /// Only emit a branch. 5694 MachineOutlinerNoLRSave, /// Emit a call and return. 5695 MachineOutlinerThunk, /// Emit a call and tail-call. 5696 MachineOutlinerRegSave /// Same as default, but save to a register. 5697 }; 5698 5699 enum MachineOutlinerMBBFlags { 5700 LRUnavailableSomewhere = 0x2, 5701 HasCalls = 0x4, 5702 UnsafeRegsDead = 0x8 5703 }; 5704 5705 unsigned 5706 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 5707 assert(C.LRUWasSet && "LRU wasn't set?"); 5708 MachineFunction *MF = C.getMF(); 5709 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 5710 MF->getSubtarget().getRegisterInfo()); 5711 5712 // Check if there is an available register across the sequence that we can 5713 // use. 5714 for (unsigned Reg : AArch64::GPR64RegClass) { 5715 if (!ARI->isReservedReg(*MF, Reg) && 5716 Reg != AArch64::LR && // LR is not reserved, but don't use it. 5717 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 5718 Reg != AArch64::X17 && // Ditto for X17. 5719 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 5720 return Reg; 5721 } 5722 5723 // No suitable register. Return 0. 5724 return 0u; 5725 } 5726 5727 static bool 5728 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, 5729 const outliner::Candidate &b) { 5730 const Function &Fa = a.getMF()->getFunction(); 5731 const Function &Fb = b.getMF()->getFunction(); 5732 5733 // If none of the functions have the "sign-return-address" attribute their 5734 // signing behaviour is equal 5735 if (!Fa.hasFnAttribute("sign-return-address") && 5736 !Fb.hasFnAttribute("sign-return-address")) { 5737 return true; 5738 } 5739 5740 // If both functions have the "sign-return-address" attribute their signing 5741 // behaviour is equal, if the values of the attributes are equal 5742 if (Fa.hasFnAttribute("sign-return-address") && 5743 Fb.hasFnAttribute("sign-return-address")) { 5744 StringRef ScopeA = 5745 Fa.getFnAttribute("sign-return-address").getValueAsString(); 5746 StringRef ScopeB = 5747 Fb.getFnAttribute("sign-return-address").getValueAsString(); 5748 return ScopeA.equals(ScopeB); 5749 } 5750 5751 // If function B doesn't have the "sign-return-address" attribute but A does, 5752 // the functions' signing behaviour is equal if A's value for 5753 // "sign-return-address" is "none" and vice versa. 5754 if (Fa.hasFnAttribute("sign-return-address")) { 5755 StringRef ScopeA = 5756 Fa.getFnAttribute("sign-return-address").getValueAsString(); 5757 return ScopeA.equals("none"); 5758 } 5759 5760 if (Fb.hasFnAttribute("sign-return-address")) { 5761 StringRef ScopeB = 5762 Fb.getFnAttribute("sign-return-address").getValueAsString(); 5763 return ScopeB.equals("none"); 5764 } 5765 5766 llvm_unreachable("Unkown combination of sign-return-address attributes"); 5767 } 5768 5769 static bool 5770 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, 5771 const outliner::Candidate &b) { 5772 const Function &Fa = a.getMF()->getFunction(); 5773 const Function &Fb = b.getMF()->getFunction(); 5774 5775 // If none of the functions have the "sign-return-address-key" attribute 5776 // their keys are equal 5777 if (!Fa.hasFnAttribute("sign-return-address-key") && 5778 !Fb.hasFnAttribute("sign-return-address-key")) { 5779 return true; 5780 } 5781 5782 // If both functions have the "sign-return-address-key" attribute their 5783 // keys are equal if the values of "sign-return-address-key" are equal 5784 if (Fa.hasFnAttribute("sign-return-address-key") && 5785 Fb.hasFnAttribute("sign-return-address-key")) { 5786 StringRef KeyA = 5787 Fa.getFnAttribute("sign-return-address-key").getValueAsString(); 5788 StringRef KeyB = 5789 Fb.getFnAttribute("sign-return-address-key").getValueAsString(); 5790 return KeyA.equals(KeyB); 5791 } 5792 5793 // If B doesn't have the "sign-return-address-key" attribute, both keys are 5794 // equal, if function a has the default key (a_key) 5795 if (Fa.hasFnAttribute("sign-return-address-key")) { 5796 StringRef KeyA = 5797 Fa.getFnAttribute("sign-return-address-key").getValueAsString(); 5798 return KeyA.equals_lower("a_key"); 5799 } 5800 5801 if (Fb.hasFnAttribute("sign-return-address-key")) { 5802 StringRef KeyB = 5803 Fb.getFnAttribute("sign-return-address-key").getValueAsString(); 5804 return KeyB.equals_lower("a_key"); 5805 } 5806 5807 llvm_unreachable("Unkown combination of sign-return-address-key attributes"); 5808 } 5809 5810 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, 5811 const outliner::Candidate &b) { 5812 const AArch64Subtarget &SubtargetA = 5813 a.getMF()->getSubtarget<AArch64Subtarget>(); 5814 const AArch64Subtarget &SubtargetB = 5815 b.getMF()->getSubtarget<AArch64Subtarget>(); 5816 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); 5817 } 5818 5819 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( 5820 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 5821 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 5822 unsigned SequenceSize = 5823 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 5824 [this](unsigned Sum, const MachineInstr &MI) { 5825 return Sum + getInstSizeInBytes(MI); 5826 }); 5827 unsigned NumBytesToCreateFrame = 0; 5828 5829 // We only allow outlining for functions having exactly matching return 5830 // address signing attributes, i.e., all share the same value for the 5831 // attribute "sign-return-address" and all share the same type of key they 5832 // are signed with. 5833 // Additionally we require all functions to simultaniously either support 5834 // v8.3a features or not. Otherwise an outlined function could get signed 5835 // using dedicated v8.3 instructions and a call from a function that doesn't 5836 // support v8.3 instructions would therefore be invalid. 5837 if (std::adjacent_find( 5838 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 5839 [](const outliner::Candidate &a, const outliner::Candidate &b) { 5840 // Return true if a and b are non-equal w.r.t. return address 5841 // signing or support of v8.3a features 5842 if (outliningCandidatesSigningScopeConsensus(a, b) && 5843 outliningCandidatesSigningKeyConsensus(a, b) && 5844 outliningCandidatesV8_3OpsConsensus(a, b)) { 5845 return false; 5846 } 5847 return true; 5848 }) != RepeatedSequenceLocs.end()) { 5849 return outliner::OutlinedFunction(); 5850 } 5851 5852 // Since at this point all candidates agree on their return address signing 5853 // picking just one is fine. If the candidate functions potentially sign their 5854 // return addresses, the outlined function should do the same. Note that in 5855 // the case of "sign-return-address"="non-leaf" this is an assumption: It is 5856 // not certainly true that the outlined function will have to sign its return 5857 // address but this decision is made later, when the decision to outline 5858 // has already been made. 5859 // The same holds for the number of additional instructions we need: On 5860 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is 5861 // necessary. However, at this point we don't know if the outlined function 5862 // will have a RET instruction so we assume the worst. 5863 const Function &FCF = FirstCand.getMF()->getFunction(); 5864 const TargetRegisterInfo &TRI = getRegisterInfo(); 5865 if (FCF.hasFnAttribute("sign-return-address")) { 5866 // One PAC and one AUT instructions 5867 NumBytesToCreateFrame += 8; 5868 5869 // We have to check if sp modifying instructions would get outlined. 5870 // If so we only allow outlining if sp is unchanged overall, so matching 5871 // sub and add instructions are okay to outline, all other sp modifications 5872 // are not 5873 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) { 5874 int SPValue = 0; 5875 MachineBasicBlock::iterator MBBI = C.front(); 5876 for (;;) { 5877 if (MBBI->modifiesRegister(AArch64::SP, &TRI)) { 5878 switch (MBBI->getOpcode()) { 5879 case AArch64::ADDXri: 5880 case AArch64::ADDWri: 5881 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 5882 assert(MBBI->getOperand(2).isImm() && 5883 "Expected operand to be immediate"); 5884 assert(MBBI->getOperand(1).isReg() && 5885 "Expected operand to be a register"); 5886 // Check if the add just increments sp. If so, we search for 5887 // matching sub instructions that decrement sp. If not, the 5888 // modification is illegal 5889 if (MBBI->getOperand(1).getReg() == AArch64::SP) 5890 SPValue += MBBI->getOperand(2).getImm(); 5891 else 5892 return true; 5893 break; 5894 case AArch64::SUBXri: 5895 case AArch64::SUBWri: 5896 assert(MBBI->getNumOperands() == 4 && "Wrong number of operands"); 5897 assert(MBBI->getOperand(2).isImm() && 5898 "Expected operand to be immediate"); 5899 assert(MBBI->getOperand(1).isReg() && 5900 "Expected operand to be a register"); 5901 // Check if the sub just decrements sp. If so, we search for 5902 // matching add instructions that increment sp. If not, the 5903 // modification is illegal 5904 if (MBBI->getOperand(1).getReg() == AArch64::SP) 5905 SPValue -= MBBI->getOperand(2).getImm(); 5906 else 5907 return true; 5908 break; 5909 default: 5910 return true; 5911 } 5912 } 5913 if (MBBI == C.back()) 5914 break; 5915 ++MBBI; 5916 } 5917 if (SPValue) 5918 return true; 5919 return false; 5920 }; 5921 // Remove candidates with illegal stack modifying instructions 5922 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 5923 RepeatedSequenceLocs.end(), 5924 hasIllegalSPModification), 5925 RepeatedSequenceLocs.end()); 5926 5927 // If the sequence doesn't have enough candidates left, then we're done. 5928 if (RepeatedSequenceLocs.size() < 2) 5929 return outliner::OutlinedFunction(); 5930 } 5931 5932 // Properties about candidate MBBs that hold for all of them. 5933 unsigned FlagsSetInAll = 0xF; 5934 5935 // Compute liveness information for each candidate, and set FlagsSetInAll. 5936 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 5937 [&FlagsSetInAll](outliner::Candidate &C) { 5938 FlagsSetInAll &= C.Flags; 5939 }); 5940 5941 // According to the AArch64 Procedure Call Standard, the following are 5942 // undefined on entry/exit from a function call: 5943 // 5944 // * Registers x16, x17, (and thus w16, w17) 5945 // * Condition codes (and thus the NZCV register) 5946 // 5947 // Because if this, we can't outline any sequence of instructions where 5948 // one 5949 // of these registers is live into/across it. Thus, we need to delete 5950 // those 5951 // candidates. 5952 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 5953 // If the unsafe registers in this block are all dead, then we don't need 5954 // to compute liveness here. 5955 if (C.Flags & UnsafeRegsDead) 5956 return false; 5957 C.initLRU(TRI); 5958 LiveRegUnits LRU = C.LRU; 5959 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 5960 !LRU.available(AArch64::NZCV)); 5961 }; 5962 5963 // Are there any candidates where those registers are live? 5964 if (!(FlagsSetInAll & UnsafeRegsDead)) { 5965 // Erase every candidate that violates the restrictions above. (It could be 5966 // true that we have viable candidates, so it's not worth bailing out in 5967 // the case that, say, 1 out of 20 candidates violate the restructions.) 5968 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 5969 RepeatedSequenceLocs.end(), 5970 CantGuaranteeValueAcrossCall), 5971 RepeatedSequenceLocs.end()); 5972 5973 // If the sequence doesn't have enough candidates left, then we're done. 5974 if (RepeatedSequenceLocs.size() < 2) 5975 return outliner::OutlinedFunction(); 5976 } 5977 5978 // At this point, we have only "safe" candidates to outline. Figure out 5979 // frame + call instruction information. 5980 5981 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 5982 5983 // Helper lambda which sets call information for every candidate. 5984 auto SetCandidateCallInfo = 5985 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 5986 for (outliner::Candidate &C : RepeatedSequenceLocs) 5987 C.setCallInfo(CallID, NumBytesForCall); 5988 }; 5989 5990 unsigned FrameID = MachineOutlinerDefault; 5991 NumBytesToCreateFrame += 4; 5992 5993 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 5994 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); 5995 }); 5996 5997 // We check to see if CFI Instructions are present, and if they are 5998 // we find the number of CFI Instructions in the candidates. 5999 unsigned CFICount = 0; 6000 MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front(); 6001 for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx(); 6002 Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) { 6003 const std::vector<MCCFIInstruction> &CFIInstructions = 6004 RepeatedSequenceLocs[0].getMF()->getFrameInstructions(); 6005 if (MBBI->isCFIInstruction()) { 6006 unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex(); 6007 MCCFIInstruction CFI = CFIInstructions[CFIIndex]; 6008 CFICount++; 6009 } 6010 MBBI++; 6011 } 6012 6013 // We compare the number of found CFI Instructions to the number of CFI 6014 // instructions in the parent function for each candidate. We must check this 6015 // since if we outline one of the CFI instructions in a function, we have to 6016 // outline them all for correctness. If we do not, the address offsets will be 6017 // incorrect between the two sections of the program. 6018 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6019 std::vector<MCCFIInstruction> CFIInstructions = 6020 C.getMF()->getFrameInstructions(); 6021 6022 if (CFICount > 0 && CFICount != CFIInstructions.size()) 6023 return outliner::OutlinedFunction(); 6024 } 6025 6026 // Returns true if an instructions is safe to fix up, false otherwise. 6027 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 6028 if (MI.isCall()) 6029 return true; 6030 6031 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 6032 !MI.readsRegister(AArch64::SP, &TRI)) 6033 return true; 6034 6035 // Any modification of SP will break our code to save/restore LR. 6036 // FIXME: We could handle some instructions which add a constant 6037 // offset to SP, with a bit more work. 6038 if (MI.modifiesRegister(AArch64::SP, &TRI)) 6039 return false; 6040 6041 // At this point, we have a stack instruction that we might need to 6042 // fix up. We'll handle it if it's a load or store. 6043 if (MI.mayLoadOrStore()) { 6044 const MachineOperand *Base; // Filled with the base operand of MI. 6045 int64_t Offset; // Filled with the offset of MI. 6046 bool OffsetIsScalable; 6047 6048 // Does it allow us to offset the base operand and is the base the 6049 // register SP? 6050 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) || 6051 !Base->isReg() || Base->getReg() != AArch64::SP) 6052 return false; 6053 6054 // Fixe-up code below assumes bytes. 6055 if (OffsetIsScalable) 6056 return false; 6057 6058 // Find the minimum/maximum offset for this instruction and check 6059 // if fixing it up would be in range. 6060 int64_t MinOffset, 6061 MaxOffset; // Unscaled offsets for the instruction. 6062 TypeSize Scale(0U, false); // The scale to multiply the offsets by. 6063 unsigned DummyWidth; 6064 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 6065 6066 Offset += 16; // Update the offset to what it would be if we outlined. 6067 if (Offset < MinOffset * (int64_t)Scale.getFixedSize() || 6068 Offset > MaxOffset * (int64_t)Scale.getFixedSize()) 6069 return false; 6070 6071 // It's in range, so we can outline it. 6072 return true; 6073 } 6074 6075 // FIXME: Add handling for instructions like "add x0, sp, #8". 6076 6077 // We can't fix it up, so don't outline it. 6078 return false; 6079 }; 6080 6081 // True if it's possible to fix up each stack instruction in this sequence. 6082 // Important for frames/call variants that modify the stack. 6083 bool AllStackInstrsSafe = std::all_of( 6084 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 6085 6086 // If the last instruction in any candidate is a terminator, then we should 6087 // tail call all of the candidates. 6088 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 6089 FrameID = MachineOutlinerTailCall; 6090 NumBytesToCreateFrame = 0; 6091 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 6092 } 6093 6094 else if (LastInstrOpcode == AArch64::BL || 6095 ((LastInstrOpcode == AArch64::BLR || 6096 LastInstrOpcode == AArch64::BLRNoIP) && 6097 !HasBTI)) { 6098 // FIXME: Do we need to check if the code after this uses the value of LR? 6099 FrameID = MachineOutlinerThunk; 6100 NumBytesToCreateFrame = 0; 6101 SetCandidateCallInfo(MachineOutlinerThunk, 4); 6102 } 6103 6104 else { 6105 // We need to decide how to emit calls + frames. We can always emit the same 6106 // frame if we don't need to save to the stack. If we have to save to the 6107 // stack, then we need a different frame. 6108 unsigned NumBytesNoStackCalls = 0; 6109 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 6110 6111 // Check if we have to save LR. 6112 for (outliner::Candidate &C : RepeatedSequenceLocs) { 6113 C.initLRU(TRI); 6114 6115 // If we have a noreturn caller, then we're going to be conservative and 6116 // say that we have to save LR. If we don't have a ret at the end of the 6117 // block, then we can't reason about liveness accurately. 6118 // 6119 // FIXME: We can probably do better than always disabling this in 6120 // noreturn functions by fixing up the liveness info. 6121 bool IsNoReturn = 6122 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn); 6123 6124 // Is LR available? If so, we don't need a save. 6125 if (C.LRU.available(AArch64::LR) && !IsNoReturn) { 6126 NumBytesNoStackCalls += 4; 6127 C.setCallInfo(MachineOutlinerNoLRSave, 4); 6128 CandidatesWithoutStackFixups.push_back(C); 6129 } 6130 6131 // Is an unused register available? If so, we won't modify the stack, so 6132 // we can outline with the same frame type as those that don't save LR. 6133 else if (findRegisterToSaveLRTo(C)) { 6134 NumBytesNoStackCalls += 12; 6135 C.setCallInfo(MachineOutlinerRegSave, 12); 6136 CandidatesWithoutStackFixups.push_back(C); 6137 } 6138 6139 // Is SP used in the sequence at all? If not, we don't have to modify 6140 // the stack, so we are guaranteed to get the same frame. 6141 else if (C.UsedInSequence.available(AArch64::SP)) { 6142 NumBytesNoStackCalls += 12; 6143 C.setCallInfo(MachineOutlinerDefault, 12); 6144 CandidatesWithoutStackFixups.push_back(C); 6145 } 6146 6147 // If we outline this, we need to modify the stack. Pretend we don't 6148 // outline this by saving all of its bytes. 6149 else { 6150 NumBytesNoStackCalls += SequenceSize; 6151 } 6152 } 6153 6154 // If there are no places where we have to save LR, then note that we 6155 // don't have to update the stack. Otherwise, give every candidate the 6156 // default call type, as long as it's safe to do so. 6157 if (!AllStackInstrsSafe || 6158 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 6159 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 6160 FrameID = MachineOutlinerNoLRSave; 6161 } else { 6162 SetCandidateCallInfo(MachineOutlinerDefault, 12); 6163 } 6164 6165 // If we dropped all of the candidates, bail out here. 6166 if (RepeatedSequenceLocs.size() < 2) { 6167 RepeatedSequenceLocs.clear(); 6168 return outliner::OutlinedFunction(); 6169 } 6170 } 6171 6172 // Does every candidate's MBB contain a call? If so, then we might have a call 6173 // in the range. 6174 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 6175 // Check if the range contains a call. These require a save + restore of the 6176 // link register. 6177 bool ModStackToSaveLR = false; 6178 if (std::any_of(FirstCand.front(), FirstCand.back(), 6179 [](const MachineInstr &MI) { return MI.isCall(); })) 6180 ModStackToSaveLR = true; 6181 6182 // Handle the last instruction separately. If this is a tail call, then the 6183 // last instruction is a call. We don't want to save + restore in this case. 6184 // However, it could be possible that the last instruction is a call without 6185 // it being valid to tail call this sequence. We should consider this as 6186 // well. 6187 else if (FrameID != MachineOutlinerThunk && 6188 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 6189 ModStackToSaveLR = true; 6190 6191 if (ModStackToSaveLR) { 6192 // We can't fix up the stack. Bail out. 6193 if (!AllStackInstrsSafe) { 6194 RepeatedSequenceLocs.clear(); 6195 return outliner::OutlinedFunction(); 6196 } 6197 6198 // Save + restore LR. 6199 NumBytesToCreateFrame += 8; 6200 } 6201 } 6202 6203 // If we have CFI instructions, we can only outline if the outlined section 6204 // can be a tail call 6205 if (FrameID != MachineOutlinerTailCall && CFICount > 0) 6206 return outliner::OutlinedFunction(); 6207 6208 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 6209 NumBytesToCreateFrame, FrameID); 6210 } 6211 6212 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 6213 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 6214 const Function &F = MF.getFunction(); 6215 6216 // Can F be deduplicated by the linker? If it can, don't outline from it. 6217 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 6218 return false; 6219 6220 // Don't outline from functions with section markings; the program could 6221 // expect that all the code is in the named section. 6222 // FIXME: Allow outlining from multiple functions with the same section 6223 // marking. 6224 if (F.hasSection()) 6225 return false; 6226 6227 // Outlining from functions with redzones is unsafe since the outliner may 6228 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 6229 // outline from it. 6230 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 6231 if (!AFI || AFI->hasRedZone().getValueOr(true)) 6232 return false; 6233 6234 // FIXME: Teach the outliner to generate/handle Windows unwind info. 6235 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 6236 return false; 6237 6238 // It's safe to outline from MF. 6239 return true; 6240 } 6241 6242 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 6243 unsigned &Flags) const { 6244 // Check if LR is available through all of the MBB. If it's not, then set 6245 // a flag. 6246 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 6247 "Suitable Machine Function for outlining must track liveness"); 6248 LiveRegUnits LRU(getRegisterInfo()); 6249 6250 std::for_each(MBB.rbegin(), MBB.rend(), 6251 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 6252 6253 // Check if each of the unsafe registers are available... 6254 bool W16AvailableInBlock = LRU.available(AArch64::W16); 6255 bool W17AvailableInBlock = LRU.available(AArch64::W17); 6256 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 6257 6258 // If all of these are dead (and not live out), we know we don't have to check 6259 // them later. 6260 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 6261 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 6262 6263 // Now, add the live outs to the set. 6264 LRU.addLiveOuts(MBB); 6265 6266 // If any of these registers is available in the MBB, but also a live out of 6267 // the block, then we know outlining is unsafe. 6268 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 6269 return false; 6270 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 6271 return false; 6272 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 6273 return false; 6274 6275 // Check if there's a call inside this MachineBasicBlock. If there is, then 6276 // set a flag. 6277 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 6278 Flags |= MachineOutlinerMBBFlags::HasCalls; 6279 6280 MachineFunction *MF = MBB.getParent(); 6281 6282 // In the event that we outline, we may have to save LR. If there is an 6283 // available register in the MBB, then we'll always save LR there. Check if 6284 // this is true. 6285 bool CanSaveLR = false; 6286 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 6287 MF->getSubtarget().getRegisterInfo()); 6288 6289 // Check if there is an available register across the sequence that we can 6290 // use. 6291 for (unsigned Reg : AArch64::GPR64RegClass) { 6292 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 6293 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 6294 CanSaveLR = true; 6295 break; 6296 } 6297 } 6298 6299 // Check if we have a register we can save LR to, and if LR was used 6300 // somewhere. If both of those things are true, then we need to evaluate the 6301 // safety of outlining stack instructions later. 6302 if (!CanSaveLR && !LRU.available(AArch64::LR)) 6303 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 6304 6305 return true; 6306 } 6307 6308 outliner::InstrType 6309 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 6310 unsigned Flags) const { 6311 MachineInstr &MI = *MIT; 6312 MachineBasicBlock *MBB = MI.getParent(); 6313 MachineFunction *MF = MBB->getParent(); 6314 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 6315 6316 // Don't outline anything used for return address signing. The outlined 6317 // function will get signed later if needed 6318 switch (MI.getOpcode()) { 6319 case AArch64::PACIASP: 6320 case AArch64::PACIBSP: 6321 case AArch64::AUTIASP: 6322 case AArch64::AUTIBSP: 6323 case AArch64::RETAA: 6324 case AArch64::RETAB: 6325 case AArch64::EMITBKEY: 6326 return outliner::InstrType::Illegal; 6327 } 6328 6329 // Don't outline LOHs. 6330 if (FuncInfo->getLOHRelated().count(&MI)) 6331 return outliner::InstrType::Illegal; 6332 6333 // We can only outline these if we will tail call the outlined function, or 6334 // fix up the CFI offsets. Currently, CFI instructions are outlined only if 6335 // in a tail call. 6336 // 6337 // FIXME: If the proper fixups for the offset are implemented, this should be 6338 // possible. 6339 if (MI.isCFIInstruction()) 6340 return outliner::InstrType::Legal; 6341 6342 // Don't allow debug values to impact outlining type. 6343 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 6344 return outliner::InstrType::Invisible; 6345 6346 // At this point, KILL instructions don't really tell us much so we can go 6347 // ahead and skip over them. 6348 if (MI.isKill()) 6349 return outliner::InstrType::Invisible; 6350 6351 // Is this a terminator for a basic block? 6352 if (MI.isTerminator()) { 6353 6354 // Is this the end of a function? 6355 if (MI.getParent()->succ_empty()) 6356 return outliner::InstrType::Legal; 6357 6358 // It's not, so don't outline it. 6359 return outliner::InstrType::Illegal; 6360 } 6361 6362 // Make sure none of the operands are un-outlinable. 6363 for (const MachineOperand &MOP : MI.operands()) { 6364 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 6365 MOP.isTargetIndex()) 6366 return outliner::InstrType::Illegal; 6367 6368 // If it uses LR or W30 explicitly, then don't touch it. 6369 if (MOP.isReg() && !MOP.isImplicit() && 6370 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 6371 return outliner::InstrType::Illegal; 6372 } 6373 6374 // Special cases for instructions that can always be outlined, but will fail 6375 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 6376 // be outlined because they don't require a *specific* value to be in LR. 6377 if (MI.getOpcode() == AArch64::ADRP) 6378 return outliner::InstrType::Legal; 6379 6380 // If MI is a call we might be able to outline it. We don't want to outline 6381 // any calls that rely on the position of items on the stack. When we outline 6382 // something containing a call, we have to emit a save and restore of LR in 6383 // the outlined function. Currently, this always happens by saving LR to the 6384 // stack. Thus, if we outline, say, half the parameters for a function call 6385 // plus the call, then we'll break the callee's expectations for the layout 6386 // of the stack. 6387 // 6388 // FIXME: Allow calls to functions which construct a stack frame, as long 6389 // as they don't access arguments on the stack. 6390 // FIXME: Figure out some way to analyze functions defined in other modules. 6391 // We should be able to compute the memory usage based on the IR calling 6392 // convention, even if we can't see the definition. 6393 if (MI.isCall()) { 6394 // Get the function associated with the call. Look at each operand and find 6395 // the one that represents the callee and get its name. 6396 const Function *Callee = nullptr; 6397 for (const MachineOperand &MOP : MI.operands()) { 6398 if (MOP.isGlobal()) { 6399 Callee = dyn_cast<Function>(MOP.getGlobal()); 6400 break; 6401 } 6402 } 6403 6404 // Never outline calls to mcount. There isn't any rule that would require 6405 // this, but the Linux kernel's "ftrace" feature depends on it. 6406 if (Callee && Callee->getName() == "\01_mcount") 6407 return outliner::InstrType::Illegal; 6408 6409 // If we don't know anything about the callee, assume it depends on the 6410 // stack layout of the caller. In that case, it's only legal to outline 6411 // as a tail-call. Explicitly list the call instructions we know about so we 6412 // don't get unexpected results with call pseudo-instructions. 6413 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 6414 if (MI.getOpcode() == AArch64::BLR || 6415 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL) 6416 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 6417 6418 if (!Callee) 6419 return UnknownCallOutlineType; 6420 6421 // We have a function we have information about. Check it if it's something 6422 // can safely outline. 6423 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 6424 6425 // We don't know what's going on with the callee at all. Don't touch it. 6426 if (!CalleeMF) 6427 return UnknownCallOutlineType; 6428 6429 // Check if we know anything about the callee saves on the function. If we 6430 // don't, then don't touch it, since that implies that we haven't 6431 // computed anything about its stack frame yet. 6432 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 6433 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 6434 MFI.getNumObjects() > 0) 6435 return UnknownCallOutlineType; 6436 6437 // At this point, we can say that CalleeMF ought to not pass anything on the 6438 // stack. Therefore, we can outline it. 6439 return outliner::InstrType::Legal; 6440 } 6441 6442 // Don't outline positions. 6443 if (MI.isPosition()) 6444 return outliner::InstrType::Illegal; 6445 6446 // Don't touch the link register or W30. 6447 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 6448 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 6449 return outliner::InstrType::Illegal; 6450 6451 // Don't outline BTI instructions, because that will prevent the outlining 6452 // site from being indirectly callable. 6453 if (MI.getOpcode() == AArch64::HINT) { 6454 int64_t Imm = MI.getOperand(0).getImm(); 6455 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 6456 return outliner::InstrType::Illegal; 6457 } 6458 6459 return outliner::InstrType::Legal; 6460 } 6461 6462 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 6463 for (MachineInstr &MI : MBB) { 6464 const MachineOperand *Base; 6465 unsigned Width; 6466 int64_t Offset; 6467 bool OffsetIsScalable; 6468 6469 // Is this a load or store with an immediate offset with SP as the base? 6470 if (!MI.mayLoadOrStore() || 6471 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width, 6472 &RI) || 6473 (Base->isReg() && Base->getReg() != AArch64::SP)) 6474 continue; 6475 6476 // It is, so we have to fix it up. 6477 TypeSize Scale(0U, false); 6478 int64_t Dummy1, Dummy2; 6479 6480 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 6481 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 6482 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 6483 assert(Scale != 0 && "Unexpected opcode!"); 6484 assert(!OffsetIsScalable && "Expected offset to be a byte offset"); 6485 6486 // We've pushed the return address to the stack, so add 16 to the offset. 6487 // This is safe, since we already checked if it would overflow when we 6488 // checked if this instruction was legal to outline. 6489 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize(); 6490 StackOffsetOperand.setImm(NewImm); 6491 } 6492 } 6493 6494 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, 6495 bool ShouldSignReturnAddr, 6496 bool ShouldSignReturnAddrWithAKey) { 6497 if (ShouldSignReturnAddr) { 6498 MachineBasicBlock::iterator MBBPAC = MBB.begin(); 6499 MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator(); 6500 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 6501 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 6502 DebugLoc DL; 6503 6504 if (MBBAUT != MBB.end()) 6505 DL = MBBAUT->getDebugLoc(); 6506 6507 // At the very beginning of the basic block we insert the following 6508 // depending on the key type 6509 // 6510 // a_key: b_key: 6511 // PACIASP EMITBKEY 6512 // CFI_INSTRUCTION PACIBSP 6513 // CFI_INSTRUCTION 6514 if (ShouldSignReturnAddrWithAKey) { 6515 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP)) 6516 .setMIFlag(MachineInstr::FrameSetup); 6517 } else { 6518 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY)) 6519 .setMIFlag(MachineInstr::FrameSetup); 6520 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP)) 6521 .setMIFlag(MachineInstr::FrameSetup); 6522 } 6523 unsigned CFIIndex = 6524 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 6525 BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) 6526 .addCFIIndex(CFIIndex) 6527 .setMIFlags(MachineInstr::FrameSetup); 6528 6529 // If v8.3a features are available we can replace a RET instruction by 6530 // RETAA or RETAB and omit the AUT instructions 6531 if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() && 6532 MBBAUT->getOpcode() == AArch64::RET) { 6533 BuildMI(MBB, MBBAUT, DL, 6534 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA 6535 : AArch64::RETAB)) 6536 .copyImplicitOps(*MBBAUT); 6537 MBB.erase(MBBAUT); 6538 } else { 6539 BuildMI(MBB, MBBAUT, DL, 6540 TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP 6541 : AArch64::AUTIBSP)) 6542 .setMIFlag(MachineInstr::FrameDestroy); 6543 } 6544 } 6545 } 6546 6547 void AArch64InstrInfo::buildOutlinedFrame( 6548 MachineBasicBlock &MBB, MachineFunction &MF, 6549 const outliner::OutlinedFunction &OF) const { 6550 6551 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>(); 6552 6553 if (OF.FrameConstructionID == MachineOutlinerTailCall) 6554 FI->setOutliningStyle("Tail Call"); 6555 else if (OF.FrameConstructionID == MachineOutlinerThunk) { 6556 // For thunk outlining, rewrite the last instruction from a call to a 6557 // tail-call. 6558 MachineInstr *Call = &*--MBB.instr_end(); 6559 unsigned TailOpcode; 6560 if (Call->getOpcode() == AArch64::BL) { 6561 TailOpcode = AArch64::TCRETURNdi; 6562 } else { 6563 assert(Call->getOpcode() == AArch64::BLR || 6564 Call->getOpcode() == AArch64::BLRNoIP); 6565 TailOpcode = AArch64::TCRETURNriALL; 6566 } 6567 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 6568 .add(Call->getOperand(0)) 6569 .addImm(0); 6570 MBB.insert(MBB.end(), TC); 6571 Call->eraseFromParent(); 6572 6573 FI->setOutliningStyle("Thunk"); 6574 } 6575 6576 bool IsLeafFunction = true; 6577 6578 // Is there a call in the outlined range? 6579 auto IsNonTailCall = [](const MachineInstr &MI) { 6580 return MI.isCall() && !MI.isReturn(); 6581 }; 6582 6583 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { 6584 // Fix up the instructions in the range, since we're going to modify the 6585 // stack. 6586 assert(OF.FrameConstructionID != MachineOutlinerDefault && 6587 "Can only fix up stack references once"); 6588 fixupPostOutline(MBB); 6589 6590 IsLeafFunction = false; 6591 6592 // LR has to be a live in so that we can save it. 6593 if (!MBB.isLiveIn(AArch64::LR)) 6594 MBB.addLiveIn(AArch64::LR); 6595 6596 MachineBasicBlock::iterator It = MBB.begin(); 6597 MachineBasicBlock::iterator Et = MBB.end(); 6598 6599 if (OF.FrameConstructionID == MachineOutlinerTailCall || 6600 OF.FrameConstructionID == MachineOutlinerThunk) 6601 Et = std::prev(MBB.end()); 6602 6603 // Insert a save before the outlined region 6604 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 6605 .addReg(AArch64::SP, RegState::Define) 6606 .addReg(AArch64::LR) 6607 .addReg(AArch64::SP) 6608 .addImm(-16); 6609 It = MBB.insert(It, STRXpre); 6610 6611 const TargetSubtargetInfo &STI = MF.getSubtarget(); 6612 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 6613 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 6614 6615 // Add a CFI saying the stack was moved 16 B down. 6616 int64_t StackPosEntry = 6617 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16)); 6618 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 6619 .addCFIIndex(StackPosEntry) 6620 .setMIFlags(MachineInstr::FrameSetup); 6621 6622 // Add a CFI saying that the LR that we want to find is now 16 B higher than 6623 // before. 6624 int64_t LRPosEntry = 6625 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16)); 6626 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 6627 .addCFIIndex(LRPosEntry) 6628 .setMIFlags(MachineInstr::FrameSetup); 6629 6630 // Insert a restore before the terminator for the function. 6631 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 6632 .addReg(AArch64::SP, RegState::Define) 6633 .addReg(AArch64::LR, RegState::Define) 6634 .addReg(AArch64::SP) 6635 .addImm(16); 6636 Et = MBB.insert(Et, LDRXpost); 6637 } 6638 6639 // If a bunch of candidates reach this point they must agree on their return 6640 // address signing. It is therefore enough to just consider the signing 6641 // behaviour of one of them 6642 const Function &CF = OF.Candidates.front().getMF()->getFunction(); 6643 bool ShouldSignReturnAddr = false; 6644 if (CF.hasFnAttribute("sign-return-address")) { 6645 StringRef Scope = 6646 CF.getFnAttribute("sign-return-address").getValueAsString(); 6647 if (Scope.equals("all")) 6648 ShouldSignReturnAddr = true; 6649 else if (Scope.equals("non-leaf") && !IsLeafFunction) 6650 ShouldSignReturnAddr = true; 6651 } 6652 6653 // a_key is the default 6654 bool ShouldSignReturnAddrWithAKey = true; 6655 if (CF.hasFnAttribute("sign-return-address-key")) { 6656 const StringRef Key = 6657 CF.getFnAttribute("sign-return-address-key").getValueAsString(); 6658 // Key can either be a_key or b_key 6659 assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) && 6660 "Return address signing key must be either a_key or b_key"); 6661 ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key"); 6662 } 6663 6664 // If this is a tail call outlined function, then there's already a return. 6665 if (OF.FrameConstructionID == MachineOutlinerTailCall || 6666 OF.FrameConstructionID == MachineOutlinerThunk) { 6667 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 6668 ShouldSignReturnAddrWithAKey); 6669 return; 6670 } 6671 6672 // It's not a tail call, so we have to insert the return ourselves. 6673 6674 // LR has to be a live in so that we can return to it. 6675 if (!MBB.isLiveIn(AArch64::LR)) 6676 MBB.addLiveIn(AArch64::LR); 6677 6678 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 6679 .addReg(AArch64::LR); 6680 MBB.insert(MBB.end(), ret); 6681 6682 signOutlinedFunction(MF, MBB, ShouldSignReturnAddr, 6683 ShouldSignReturnAddrWithAKey); 6684 6685 FI->setOutliningStyle("Function"); 6686 6687 // Did we have to modify the stack by saving the link register? 6688 if (OF.FrameConstructionID != MachineOutlinerDefault) 6689 return; 6690 6691 // We modified the stack. 6692 // Walk over the basic block and fix up all the stack accesses. 6693 fixupPostOutline(MBB); 6694 } 6695 6696 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 6697 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 6698 MachineFunction &MF, const outliner::Candidate &C) const { 6699 6700 // Are we tail calling? 6701 if (C.CallConstructionID == MachineOutlinerTailCall) { 6702 // If yes, then we can just branch to the label. 6703 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 6704 .addGlobalAddress(M.getNamedValue(MF.getName())) 6705 .addImm(0)); 6706 return It; 6707 } 6708 6709 // Are we saving the link register? 6710 if (C.CallConstructionID == MachineOutlinerNoLRSave || 6711 C.CallConstructionID == MachineOutlinerThunk) { 6712 // No, so just insert the call. 6713 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 6714 .addGlobalAddress(M.getNamedValue(MF.getName()))); 6715 return It; 6716 } 6717 6718 // We want to return the spot where we inserted the call. 6719 MachineBasicBlock::iterator CallPt; 6720 6721 // Instructions for saving and restoring LR around the call instruction we're 6722 // going to insert. 6723 MachineInstr *Save; 6724 MachineInstr *Restore; 6725 // Can we save to a register? 6726 if (C.CallConstructionID == MachineOutlinerRegSave) { 6727 // FIXME: This logic should be sunk into a target-specific interface so that 6728 // we don't have to recompute the register. 6729 unsigned Reg = findRegisterToSaveLRTo(C); 6730 assert(Reg != 0 && "No callee-saved register available?"); 6731 6732 // Save and restore LR from that register. 6733 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 6734 .addReg(AArch64::XZR) 6735 .addReg(AArch64::LR) 6736 .addImm(0); 6737 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 6738 .addReg(AArch64::XZR) 6739 .addReg(Reg) 6740 .addImm(0); 6741 } else { 6742 // We have the default case. Save and restore from SP. 6743 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 6744 .addReg(AArch64::SP, RegState::Define) 6745 .addReg(AArch64::LR) 6746 .addReg(AArch64::SP) 6747 .addImm(-16); 6748 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 6749 .addReg(AArch64::SP, RegState::Define) 6750 .addReg(AArch64::LR, RegState::Define) 6751 .addReg(AArch64::SP) 6752 .addImm(16); 6753 } 6754 6755 It = MBB.insert(It, Save); 6756 It++; 6757 6758 // Insert the call. 6759 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 6760 .addGlobalAddress(M.getNamedValue(MF.getName()))); 6761 CallPt = It; 6762 It++; 6763 6764 It = MBB.insert(It, Restore); 6765 return CallPt; 6766 } 6767 6768 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 6769 MachineFunction &MF) const { 6770 return MF.getFunction().hasMinSize(); 6771 } 6772 6773 Optional<DestSourcePair> 6774 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 6775 6776 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 6777 // and zero immediate operands used as an alias for mov instruction. 6778 if (MI.getOpcode() == AArch64::ORRWrs && 6779 MI.getOperand(1).getReg() == AArch64::WZR && 6780 MI.getOperand(3).getImm() == 0x0) { 6781 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 6782 } 6783 6784 if (MI.getOpcode() == AArch64::ORRXrs && 6785 MI.getOperand(1).getReg() == AArch64::XZR && 6786 MI.getOperand(3).getImm() == 0x0) { 6787 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; 6788 } 6789 6790 return None; 6791 } 6792 6793 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, 6794 Register Reg) const { 6795 int Sign = 1; 6796 int64_t Offset = 0; 6797 6798 // TODO: Handle cases where Reg is a super- or sub-register of the 6799 // destination register. 6800 const MachineOperand &Op0 = MI.getOperand(0); 6801 if (!Op0.isReg() || Reg != Op0.getReg()) 6802 return None; 6803 6804 switch (MI.getOpcode()) { 6805 default: 6806 return None; 6807 case AArch64::SUBWri: 6808 case AArch64::SUBXri: 6809 case AArch64::SUBSWri: 6810 case AArch64::SUBSXri: 6811 Sign *= -1; 6812 LLVM_FALLTHROUGH; 6813 case AArch64::ADDSWri: 6814 case AArch64::ADDSXri: 6815 case AArch64::ADDWri: 6816 case AArch64::ADDXri: { 6817 // TODO: Third operand can be global address (usually some string). 6818 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || 6819 !MI.getOperand(2).isImm()) 6820 return None; 6821 Offset = MI.getOperand(2).getImm() * Sign; 6822 int Shift = MI.getOperand(3).getImm(); 6823 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); 6824 Offset = Offset << Shift; 6825 } 6826 } 6827 return RegImmPair{MI.getOperand(1).getReg(), Offset}; 6828 } 6829 6830 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with 6831 /// the destination register then, if possible, describe the value in terms of 6832 /// the source register. 6833 static Optional<ParamLoadedValue> 6834 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, 6835 const TargetInstrInfo *TII, 6836 const TargetRegisterInfo *TRI) { 6837 auto DestSrc = TII->isCopyInstr(MI); 6838 if (!DestSrc) 6839 return None; 6840 6841 Register DestReg = DestSrc->Destination->getReg(); 6842 Register SrcReg = DestSrc->Source->getReg(); 6843 6844 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); 6845 6846 // If the described register is the destination, just return the source. 6847 if (DestReg == DescribedReg) 6848 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 6849 6850 // ORRWrs zero-extends to 64-bits, so we need to consider such cases. 6851 if (MI.getOpcode() == AArch64::ORRWrs && 6852 TRI->isSuperRegister(DestReg, DescribedReg)) 6853 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 6854 6855 // We may need to describe the lower part of a ORRXrs move. 6856 if (MI.getOpcode() == AArch64::ORRXrs && 6857 TRI->isSubRegister(DestReg, DescribedReg)) { 6858 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32); 6859 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); 6860 } 6861 6862 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && 6863 "Unhandled ORR[XW]rs copy case"); 6864 6865 return None; 6866 } 6867 6868 Optional<ParamLoadedValue> 6869 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, 6870 Register Reg) const { 6871 const MachineFunction *MF = MI.getMF(); 6872 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 6873 switch (MI.getOpcode()) { 6874 case AArch64::MOVZWi: 6875 case AArch64::MOVZXi: { 6876 // MOVZWi may be used for producing zero-extended 32-bit immediates in 6877 // 64-bit parameters, so we need to consider super-registers. 6878 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) 6879 return None; 6880 6881 if (!MI.getOperand(1).isImm()) 6882 return None; 6883 int64_t Immediate = MI.getOperand(1).getImm(); 6884 int Shift = MI.getOperand(2).getImm(); 6885 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), 6886 nullptr); 6887 } 6888 case AArch64::ORRWrs: 6889 case AArch64::ORRXrs: 6890 return describeORRLoadedValue(MI, Reg, this, TRI); 6891 } 6892 6893 return TargetInstrInfo::describeLoadedValue(MI, Reg); 6894 } 6895 6896 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const { 6897 return get(Opc).TSFlags & AArch64::ElementSizeMask; 6898 } 6899 6900 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { 6901 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr()) 6902 return AArch64::BLRNoIP; 6903 else 6904 return AArch64::BLR; 6905 } 6906 6907 #define GET_INSTRINFO_HELPERS 6908 #define GET_INSTRMAP_INFO 6909 #include "AArch64GenInstrInfo.inc" 6910