1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/StackMaps.h" 30 #include "llvm/IR/DebugLoc.h" 31 #include "llvm/IR/GlobalValue.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/CodeGen.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Target/TargetMachine.h" 41 #include "llvm/Target/TargetOptions.h" 42 #include "llvm/Target/TargetRegisterInfo.h" 43 #include "llvm/Target/TargetSubtargetInfo.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define GET_INSTRINFO_CTOR_DTOR 52 #include "AArch64GenInstrInfo.inc" 53 54 static const MachineMemOperand::Flags MOSuppressPair = 55 MachineMemOperand::MOTargetFlag1; 56 57 static cl::opt<unsigned> 58 TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 60 61 static cl::opt<unsigned> 62 CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 64 65 static cl::opt<unsigned> 66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 67 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 68 69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 71 RI(STI.getTargetTriple()), Subtarget(STI) {} 72 73 /// GetInstSize - Return the number of bytes of code the specified 74 /// instruction may be. This returns the maximum number of bytes. 75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 76 const MachineBasicBlock &MBB = *MI.getParent(); 77 const MachineFunction *MF = MBB.getParent(); 78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 79 80 if (MI.getOpcode() == AArch64::INLINEASM) 81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 82 83 // FIXME: We currently only handle pseudoinstructions that don't get expanded 84 // before the assembly printer. 85 unsigned NumBytes = 0; 86 const MCInstrDesc &Desc = MI.getDesc(); 87 switch (Desc.getOpcode()) { 88 default: 89 // Anything not explicitly designated otherwise is a normal 4-byte insn. 90 NumBytes = 4; 91 break; 92 case TargetOpcode::DBG_VALUE: 93 case TargetOpcode::EH_LABEL: 94 case TargetOpcode::IMPLICIT_DEF: 95 case TargetOpcode::KILL: 96 NumBytes = 0; 97 break; 98 case TargetOpcode::STACKMAP: 99 // The upper bound for a stackmap intrinsic is the full length of its shadow 100 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 102 break; 103 case TargetOpcode::PATCHPOINT: 104 // The size of the patchpoint intrinsic is the number of bytes requested 105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 107 break; 108 case AArch64::TLSDESC_CALLSEQ: 109 // This gets lowered to an instruction sequence which takes 16 bytes 110 NumBytes = 16; 111 break; 112 } 113 114 return NumBytes; 115 } 116 117 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 118 SmallVectorImpl<MachineOperand> &Cond) { 119 // Block ends with fall-through condbranch. 120 switch (LastInst->getOpcode()) { 121 default: 122 llvm_unreachable("Unknown branch instruction?"); 123 case AArch64::Bcc: 124 Target = LastInst->getOperand(1).getMBB(); 125 Cond.push_back(LastInst->getOperand(0)); 126 break; 127 case AArch64::CBZW: 128 case AArch64::CBZX: 129 case AArch64::CBNZW: 130 case AArch64::CBNZX: 131 Target = LastInst->getOperand(1).getMBB(); 132 Cond.push_back(MachineOperand::CreateImm(-1)); 133 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 134 Cond.push_back(LastInst->getOperand(0)); 135 break; 136 case AArch64::TBZW: 137 case AArch64::TBZX: 138 case AArch64::TBNZW: 139 case AArch64::TBNZX: 140 Target = LastInst->getOperand(2).getMBB(); 141 Cond.push_back(MachineOperand::CreateImm(-1)); 142 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 143 Cond.push_back(LastInst->getOperand(0)); 144 Cond.push_back(LastInst->getOperand(1)); 145 } 146 } 147 148 static unsigned getBranchDisplacementBits(unsigned Opc) { 149 switch (Opc) { 150 default: 151 llvm_unreachable("unexpected opcode!"); 152 case AArch64::B: 153 return 64; 154 case AArch64::TBNZW: 155 case AArch64::TBZW: 156 case AArch64::TBNZX: 157 case AArch64::TBZX: 158 return TBZDisplacementBits; 159 case AArch64::CBNZW: 160 case AArch64::CBZW: 161 case AArch64::CBNZX: 162 case AArch64::CBZX: 163 return CBZDisplacementBits; 164 case AArch64::Bcc: 165 return BCCDisplacementBits; 166 } 167 } 168 169 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 170 int64_t BrOffset) const { 171 unsigned Bits = getBranchDisplacementBits(BranchOp); 172 assert(Bits >= 3 && "max branch displacement must be enough to jump" 173 "over conditional branch expansion"); 174 return isIntN(Bits, BrOffset / 4); 175 } 176 177 MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock( 178 const MachineInstr &MI) const { 179 switch (MI.getOpcode()) { 180 default: 181 llvm_unreachable("unexpected opcode!"); 182 case AArch64::B: 183 return MI.getOperand(0).getMBB(); 184 case AArch64::TBZW: 185 case AArch64::TBNZW: 186 case AArch64::TBZX: 187 case AArch64::TBNZX: 188 return MI.getOperand(2).getMBB(); 189 case AArch64::CBZW: 190 case AArch64::CBNZW: 191 case AArch64::CBZX: 192 case AArch64::CBNZX: 193 case AArch64::Bcc: 194 return MI.getOperand(1).getMBB(); 195 } 196 } 197 198 // Branch analysis. 199 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 200 MachineBasicBlock *&TBB, 201 MachineBasicBlock *&FBB, 202 SmallVectorImpl<MachineOperand> &Cond, 203 bool AllowModify) const { 204 // If the block has no terminators, it just falls into the block after it. 205 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 206 if (I == MBB.end()) 207 return false; 208 209 if (!isUnpredicatedTerminator(*I)) 210 return false; 211 212 // Get the last instruction in the block. 213 MachineInstr *LastInst = &*I; 214 215 // If there is only one terminator instruction, process it. 216 unsigned LastOpc = LastInst->getOpcode(); 217 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 218 if (isUncondBranchOpcode(LastOpc)) { 219 TBB = LastInst->getOperand(0).getMBB(); 220 return false; 221 } 222 if (isCondBranchOpcode(LastOpc)) { 223 // Block ends with fall-through condbranch. 224 parseCondBranch(LastInst, TBB, Cond); 225 return false; 226 } 227 return true; // Can't handle indirect branch. 228 } 229 230 // Get the instruction before it if it is a terminator. 231 MachineInstr *SecondLastInst = &*I; 232 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 233 234 // If AllowModify is true and the block ends with two or more unconditional 235 // branches, delete all but the first unconditional branch. 236 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 237 while (isUncondBranchOpcode(SecondLastOpc)) { 238 LastInst->eraseFromParent(); 239 LastInst = SecondLastInst; 240 LastOpc = LastInst->getOpcode(); 241 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 242 // Return now the only terminator is an unconditional branch. 243 TBB = LastInst->getOperand(0).getMBB(); 244 return false; 245 } else { 246 SecondLastInst = &*I; 247 SecondLastOpc = SecondLastInst->getOpcode(); 248 } 249 } 250 } 251 252 // If there are three terminators, we don't know what sort of block this is. 253 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 254 return true; 255 256 // If the block ends with a B and a Bcc, handle it. 257 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 258 parseCondBranch(SecondLastInst, TBB, Cond); 259 FBB = LastInst->getOperand(0).getMBB(); 260 return false; 261 } 262 263 // If the block ends with two unconditional branches, handle it. The second 264 // one is not executed, so remove it. 265 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 266 TBB = SecondLastInst->getOperand(0).getMBB(); 267 I = LastInst; 268 if (AllowModify) 269 I->eraseFromParent(); 270 return false; 271 } 272 273 // ...likewise if it ends with an indirect branch followed by an unconditional 274 // branch. 275 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 276 I = LastInst; 277 if (AllowModify) 278 I->eraseFromParent(); 279 return true; 280 } 281 282 // Otherwise, can't handle this. 283 return true; 284 } 285 286 bool AArch64InstrInfo::reverseBranchCondition( 287 SmallVectorImpl<MachineOperand> &Cond) const { 288 if (Cond[0].getImm() != -1) { 289 // Regular Bcc 290 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 291 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 292 } else { 293 // Folded compare-and-branch 294 switch (Cond[1].getImm()) { 295 default: 296 llvm_unreachable("Unknown conditional branch!"); 297 case AArch64::CBZW: 298 Cond[1].setImm(AArch64::CBNZW); 299 break; 300 case AArch64::CBNZW: 301 Cond[1].setImm(AArch64::CBZW); 302 break; 303 case AArch64::CBZX: 304 Cond[1].setImm(AArch64::CBNZX); 305 break; 306 case AArch64::CBNZX: 307 Cond[1].setImm(AArch64::CBZX); 308 break; 309 case AArch64::TBZW: 310 Cond[1].setImm(AArch64::TBNZW); 311 break; 312 case AArch64::TBNZW: 313 Cond[1].setImm(AArch64::TBZW); 314 break; 315 case AArch64::TBZX: 316 Cond[1].setImm(AArch64::TBNZX); 317 break; 318 case AArch64::TBNZX: 319 Cond[1].setImm(AArch64::TBZX); 320 break; 321 } 322 } 323 324 return false; 325 } 326 327 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 328 int *BytesRemoved) const { 329 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 330 if (I == MBB.end()) 331 return 0; 332 333 if (!isUncondBranchOpcode(I->getOpcode()) && 334 !isCondBranchOpcode(I->getOpcode())) 335 return 0; 336 337 // Remove the branch. 338 I->eraseFromParent(); 339 340 I = MBB.end(); 341 342 if (I == MBB.begin()) { 343 if (BytesRemoved) 344 *BytesRemoved = 4; 345 return 1; 346 } 347 --I; 348 if (!isCondBranchOpcode(I->getOpcode())) { 349 if (BytesRemoved) 350 *BytesRemoved = 4; 351 return 1; 352 } 353 354 // Remove the branch. 355 I->eraseFromParent(); 356 if (BytesRemoved) 357 *BytesRemoved = 8; 358 359 return 2; 360 } 361 362 void AArch64InstrInfo::instantiateCondBranch( 363 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 364 ArrayRef<MachineOperand> Cond) const { 365 if (Cond[0].getImm() != -1) { 366 // Regular Bcc 367 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 368 } else { 369 // Folded compare-and-branch 370 // Note that we use addOperand instead of addReg to keep the flags. 371 const MachineInstrBuilder MIB = 372 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); 373 if (Cond.size() > 3) 374 MIB.addImm(Cond[3].getImm()); 375 MIB.addMBB(TBB); 376 } 377 } 378 379 unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, 380 MachineBasicBlock *TBB, 381 MachineBasicBlock *FBB, 382 ArrayRef<MachineOperand> Cond, 383 const DebugLoc &DL, 384 int *BytesAdded) const { 385 // Shouldn't be a fall through. 386 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 387 388 if (!FBB) { 389 if (Cond.empty()) // Unconditional branch? 390 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 391 else 392 instantiateCondBranch(MBB, DL, TBB, Cond); 393 394 if (BytesAdded) 395 *BytesAdded = 4; 396 397 return 1; 398 } 399 400 // Two-way conditional branch. 401 instantiateCondBranch(MBB, DL, TBB, Cond); 402 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 403 404 if (BytesAdded) 405 *BytesAdded = 8; 406 407 return 2; 408 } 409 410 // Find the original register that VReg is copied from. 411 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 412 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 413 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 414 if (!DefMI->isFullCopy()) 415 return VReg; 416 VReg = DefMI->getOperand(1).getReg(); 417 } 418 return VReg; 419 } 420 421 // Determine if VReg is defined by an instruction that can be folded into a 422 // csel instruction. If so, return the folded opcode, and the replacement 423 // register. 424 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 425 unsigned *NewVReg = nullptr) { 426 VReg = removeCopies(MRI, VReg); 427 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 428 return 0; 429 430 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 431 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 432 unsigned Opc = 0; 433 unsigned SrcOpNum = 0; 434 switch (DefMI->getOpcode()) { 435 case AArch64::ADDSXri: 436 case AArch64::ADDSWri: 437 // if NZCV is used, do not fold. 438 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 439 return 0; 440 // fall-through to ADDXri and ADDWri. 441 LLVM_FALLTHROUGH; 442 case AArch64::ADDXri: 443 case AArch64::ADDWri: 444 // add x, 1 -> csinc. 445 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 446 DefMI->getOperand(3).getImm() != 0) 447 return 0; 448 SrcOpNum = 1; 449 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 450 break; 451 452 case AArch64::ORNXrr: 453 case AArch64::ORNWrr: { 454 // not x -> csinv, represented as orn dst, xzr, src. 455 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 456 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 457 return 0; 458 SrcOpNum = 2; 459 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 460 break; 461 } 462 463 case AArch64::SUBSXrr: 464 case AArch64::SUBSWrr: 465 // if NZCV is used, do not fold. 466 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 467 return 0; 468 // fall-through to SUBXrr and SUBWrr. 469 LLVM_FALLTHROUGH; 470 case AArch64::SUBXrr: 471 case AArch64::SUBWrr: { 472 // neg x -> csneg, represented as sub dst, xzr, src. 473 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 474 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 475 return 0; 476 SrcOpNum = 2; 477 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 478 break; 479 } 480 default: 481 return 0; 482 } 483 assert(Opc && SrcOpNum && "Missing parameters"); 484 485 if (NewVReg) 486 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 487 return Opc; 488 } 489 490 bool AArch64InstrInfo::canInsertSelect( 491 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, 492 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 493 int &FalseCycles) const { 494 // Check register classes. 495 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 496 const TargetRegisterClass *RC = 497 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 498 if (!RC) 499 return false; 500 501 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 502 unsigned ExtraCondLat = Cond.size() != 1; 503 504 // GPRs are handled by csel. 505 // FIXME: Fold in x+1, -x, and ~x when applicable. 506 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 507 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 508 // Single-cycle csel, csinc, csinv, and csneg. 509 CondCycles = 1 + ExtraCondLat; 510 TrueCycles = FalseCycles = 1; 511 if (canFoldIntoCSel(MRI, TrueReg)) 512 TrueCycles = 0; 513 else if (canFoldIntoCSel(MRI, FalseReg)) 514 FalseCycles = 0; 515 return true; 516 } 517 518 // Scalar floating point is handled by fcsel. 519 // FIXME: Form fabs, fmin, and fmax when applicable. 520 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 521 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 522 CondCycles = 5 + ExtraCondLat; 523 TrueCycles = FalseCycles = 2; 524 return true; 525 } 526 527 // Can't do vectors. 528 return false; 529 } 530 531 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 532 MachineBasicBlock::iterator I, 533 const DebugLoc &DL, unsigned DstReg, 534 ArrayRef<MachineOperand> Cond, 535 unsigned TrueReg, unsigned FalseReg) const { 536 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 537 538 // Parse the condition code, see parseCondBranch() above. 539 AArch64CC::CondCode CC; 540 switch (Cond.size()) { 541 default: 542 llvm_unreachable("Unknown condition opcode in Cond"); 543 case 1: // b.cc 544 CC = AArch64CC::CondCode(Cond[0].getImm()); 545 break; 546 case 3: { // cbz/cbnz 547 // We must insert a compare against 0. 548 bool Is64Bit; 549 switch (Cond[1].getImm()) { 550 default: 551 llvm_unreachable("Unknown branch opcode in Cond"); 552 case AArch64::CBZW: 553 Is64Bit = false; 554 CC = AArch64CC::EQ; 555 break; 556 case AArch64::CBZX: 557 Is64Bit = true; 558 CC = AArch64CC::EQ; 559 break; 560 case AArch64::CBNZW: 561 Is64Bit = false; 562 CC = AArch64CC::NE; 563 break; 564 case AArch64::CBNZX: 565 Is64Bit = true; 566 CC = AArch64CC::NE; 567 break; 568 } 569 unsigned SrcReg = Cond[2].getReg(); 570 if (Is64Bit) { 571 // cmp reg, #0 is actually subs xzr, reg, #0. 572 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 573 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 574 .addReg(SrcReg) 575 .addImm(0) 576 .addImm(0); 577 } else { 578 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 579 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 580 .addReg(SrcReg) 581 .addImm(0) 582 .addImm(0); 583 } 584 break; 585 } 586 case 4: { // tbz/tbnz 587 // We must insert a tst instruction. 588 switch (Cond[1].getImm()) { 589 default: 590 llvm_unreachable("Unknown branch opcode in Cond"); 591 case AArch64::TBZW: 592 case AArch64::TBZX: 593 CC = AArch64CC::EQ; 594 break; 595 case AArch64::TBNZW: 596 case AArch64::TBNZX: 597 CC = AArch64CC::NE; 598 break; 599 } 600 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 601 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 602 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 603 .addReg(Cond[2].getReg()) 604 .addImm( 605 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 606 else 607 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 608 .addReg(Cond[2].getReg()) 609 .addImm( 610 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 611 break; 612 } 613 } 614 615 unsigned Opc = 0; 616 const TargetRegisterClass *RC = nullptr; 617 bool TryFold = false; 618 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 619 RC = &AArch64::GPR64RegClass; 620 Opc = AArch64::CSELXr; 621 TryFold = true; 622 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 623 RC = &AArch64::GPR32RegClass; 624 Opc = AArch64::CSELWr; 625 TryFold = true; 626 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 627 RC = &AArch64::FPR64RegClass; 628 Opc = AArch64::FCSELDrrr; 629 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 630 RC = &AArch64::FPR32RegClass; 631 Opc = AArch64::FCSELSrrr; 632 } 633 assert(RC && "Unsupported regclass"); 634 635 // Try folding simple instructions into the csel. 636 if (TryFold) { 637 unsigned NewVReg = 0; 638 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 639 if (FoldedOpc) { 640 // The folded opcodes csinc, csinc and csneg apply the operation to 641 // FalseReg, so we need to invert the condition. 642 CC = AArch64CC::getInvertedCondCode(CC); 643 TrueReg = FalseReg; 644 } else 645 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 646 647 // Fold the operation. Leave any dead instructions for DCE to clean up. 648 if (FoldedOpc) { 649 FalseReg = NewVReg; 650 Opc = FoldedOpc; 651 // The extends the live range of NewVReg. 652 MRI.clearKillFlags(NewVReg); 653 } 654 } 655 656 // Pull all virtual register into the appropriate class. 657 MRI.constrainRegClass(TrueReg, RC); 658 MRI.constrainRegClass(FalseReg, RC); 659 660 // Insert the csel. 661 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 662 CC); 663 } 664 665 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 666 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 667 uint64_t Imm = MI.getOperand(1).getImm(); 668 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 669 uint64_t Encoding; 670 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 671 } 672 673 // FIXME: this implementation should be micro-architecture dependent, so a 674 // micro-architecture target hook should be introduced here in future. 675 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 676 if (!Subtarget.hasCustomCheapAsMoveHandling()) 677 return MI.isAsCheapAsAMove(); 678 679 unsigned Imm; 680 681 switch (MI.getOpcode()) { 682 default: 683 return false; 684 685 // add/sub on register without shift 686 case AArch64::ADDWri: 687 case AArch64::ADDXri: 688 case AArch64::SUBWri: 689 case AArch64::SUBXri: 690 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 || 691 MI.getOperand(3).getImm() == 0); 692 693 // add/sub on register with shift 694 case AArch64::ADDWrs: 695 case AArch64::ADDXrs: 696 case AArch64::SUBWrs: 697 case AArch64::SUBXrs: 698 Imm = MI.getOperand(3).getImm(); 699 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && 700 AArch64_AM::getArithShiftValue(Imm) < 4); 701 702 // logical ops on immediate 703 case AArch64::ANDWri: 704 case AArch64::ANDXri: 705 case AArch64::EORWri: 706 case AArch64::EORXri: 707 case AArch64::ORRWri: 708 case AArch64::ORRXri: 709 return true; 710 711 // logical ops on register without shift 712 case AArch64::ANDWrr: 713 case AArch64::ANDXrr: 714 case AArch64::BICWrr: 715 case AArch64::BICXrr: 716 case AArch64::EONWrr: 717 case AArch64::EONXrr: 718 case AArch64::EORWrr: 719 case AArch64::EORXrr: 720 case AArch64::ORNWrr: 721 case AArch64::ORNXrr: 722 case AArch64::ORRWrr: 723 case AArch64::ORRXrr: 724 return true; 725 726 // logical ops on register with shift 727 case AArch64::ANDWrs: 728 case AArch64::ANDXrs: 729 case AArch64::BICWrs: 730 case AArch64::BICXrs: 731 case AArch64::EONWrs: 732 case AArch64::EONXrs: 733 case AArch64::EORWrs: 734 case AArch64::EORXrs: 735 case AArch64::ORNWrs: 736 case AArch64::ORNXrs: 737 case AArch64::ORRWrs: 738 case AArch64::ORRXrs: 739 Imm = MI.getOperand(3).getImm(); 740 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 && 741 AArch64_AM::getShiftValue(Imm) < 4 && 742 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL); 743 744 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 745 // ORRXri, it is as cheap as MOV 746 case AArch64::MOVi32imm: 747 return canBeExpandedToORR(MI, 32); 748 case AArch64::MOVi64imm: 749 return canBeExpandedToORR(MI, 64); 750 751 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing 752 // feature. 753 case AArch64::FMOVS0: 754 case AArch64::FMOVD0: 755 return Subtarget.hasZeroCycleZeroing(); 756 case TargetOpcode::COPY: 757 return (Subtarget.hasZeroCycleZeroing() && 758 (MI.getOperand(1).getReg() == AArch64::WZR || 759 MI.getOperand(1).getReg() == AArch64::XZR)); 760 } 761 762 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 763 } 764 765 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 766 unsigned &SrcReg, unsigned &DstReg, 767 unsigned &SubIdx) const { 768 switch (MI.getOpcode()) { 769 default: 770 return false; 771 case AArch64::SBFMXri: // aka sxtw 772 case AArch64::UBFMXri: // aka uxtw 773 // Check for the 32 -> 64 bit extension case, these instructions can do 774 // much more. 775 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 776 return false; 777 // This is a signed or unsigned 32 -> 64 bit extension. 778 SrcReg = MI.getOperand(1).getReg(); 779 DstReg = MI.getOperand(0).getReg(); 780 SubIdx = AArch64::sub_32; 781 return true; 782 } 783 } 784 785 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 786 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { 787 const TargetRegisterInfo *TRI = &getRegisterInfo(); 788 unsigned BaseRegA = 0, BaseRegB = 0; 789 int64_t OffsetA = 0, OffsetB = 0; 790 unsigned WidthA = 0, WidthB = 0; 791 792 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 793 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 794 795 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 796 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 797 return false; 798 799 // Retrieve the base register, offset from the base register and width. Width 800 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 801 // base registers are identical, and the offset of a lower memory access + 802 // the width doesn't overlap the offset of a higher memory access, 803 // then the memory accesses are different. 804 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 805 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 806 if (BaseRegA == BaseRegB) { 807 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 808 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 809 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 810 if (LowOffset + LowWidth <= HighOffset) 811 return true; 812 } 813 } 814 return false; 815 } 816 817 /// analyzeCompare - For a comparison instruction, return the source registers 818 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 819 /// Return true if the comparison instruction can be analyzed. 820 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 821 unsigned &SrcReg2, int &CmpMask, 822 int &CmpValue) const { 823 switch (MI.getOpcode()) { 824 default: 825 break; 826 case AArch64::SUBSWrr: 827 case AArch64::SUBSWrs: 828 case AArch64::SUBSWrx: 829 case AArch64::SUBSXrr: 830 case AArch64::SUBSXrs: 831 case AArch64::SUBSXrx: 832 case AArch64::ADDSWrr: 833 case AArch64::ADDSWrs: 834 case AArch64::ADDSWrx: 835 case AArch64::ADDSXrr: 836 case AArch64::ADDSXrs: 837 case AArch64::ADDSXrx: 838 // Replace SUBSWrr with SUBWrr if NZCV is not used. 839 SrcReg = MI.getOperand(1).getReg(); 840 SrcReg2 = MI.getOperand(2).getReg(); 841 CmpMask = ~0; 842 CmpValue = 0; 843 return true; 844 case AArch64::SUBSWri: 845 case AArch64::ADDSWri: 846 case AArch64::SUBSXri: 847 case AArch64::ADDSXri: 848 SrcReg = MI.getOperand(1).getReg(); 849 SrcReg2 = 0; 850 CmpMask = ~0; 851 // FIXME: In order to convert CmpValue to 0 or 1 852 CmpValue = MI.getOperand(2).getImm() != 0; 853 return true; 854 case AArch64::ANDSWri: 855 case AArch64::ANDSXri: 856 // ANDS does not use the same encoding scheme as the others xxxS 857 // instructions. 858 SrcReg = MI.getOperand(1).getReg(); 859 SrcReg2 = 0; 860 CmpMask = ~0; 861 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 862 // while the type of CmpValue is int. When converting uint64_t to int, 863 // the high 32 bits of uint64_t will be lost. 864 // In fact it causes a bug in spec2006-483.xalancbmk 865 // CmpValue is only used to compare with zero in OptimizeCompareInstr 866 CmpValue = AArch64_AM::decodeLogicalImmediate( 867 MI.getOperand(2).getImm(), 868 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 869 return true; 870 } 871 872 return false; 873 } 874 875 static bool UpdateOperandRegClass(MachineInstr &Instr) { 876 MachineBasicBlock *MBB = Instr.getParent(); 877 assert(MBB && "Can't get MachineBasicBlock here"); 878 MachineFunction *MF = MBB->getParent(); 879 assert(MF && "Can't get MachineFunction here"); 880 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 881 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 882 MachineRegisterInfo *MRI = &MF->getRegInfo(); 883 884 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 885 ++OpIdx) { 886 MachineOperand &MO = Instr.getOperand(OpIdx); 887 const TargetRegisterClass *OpRegCstraints = 888 Instr.getRegClassConstraint(OpIdx, TII, TRI); 889 890 // If there's no constraint, there's nothing to do. 891 if (!OpRegCstraints) 892 continue; 893 // If the operand is a frame index, there's nothing to do here. 894 // A frame index operand will resolve correctly during PEI. 895 if (MO.isFI()) 896 continue; 897 898 assert(MO.isReg() && 899 "Operand has register constraints without being a register!"); 900 901 unsigned Reg = MO.getReg(); 902 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 903 if (!OpRegCstraints->contains(Reg)) 904 return false; 905 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 906 !MRI->constrainRegClass(Reg, OpRegCstraints)) 907 return false; 908 } 909 910 return true; 911 } 912 913 /// \brief Return the opcode that does not set flags when possible - otherwise 914 /// return the original opcode. The caller is responsible to do the actual 915 /// substitution and legality checking. 916 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { 917 // Don't convert all compare instructions, because for some the zero register 918 // encoding becomes the sp register. 919 bool MIDefinesZeroReg = false; 920 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 921 MIDefinesZeroReg = true; 922 923 switch (MI.getOpcode()) { 924 default: 925 return MI.getOpcode(); 926 case AArch64::ADDSWrr: 927 return AArch64::ADDWrr; 928 case AArch64::ADDSWri: 929 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 930 case AArch64::ADDSWrs: 931 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 932 case AArch64::ADDSWrx: 933 return AArch64::ADDWrx; 934 case AArch64::ADDSXrr: 935 return AArch64::ADDXrr; 936 case AArch64::ADDSXri: 937 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 938 case AArch64::ADDSXrs: 939 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 940 case AArch64::ADDSXrx: 941 return AArch64::ADDXrx; 942 case AArch64::SUBSWrr: 943 return AArch64::SUBWrr; 944 case AArch64::SUBSWri: 945 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 946 case AArch64::SUBSWrs: 947 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 948 case AArch64::SUBSWrx: 949 return AArch64::SUBWrx; 950 case AArch64::SUBSXrr: 951 return AArch64::SUBXrr; 952 case AArch64::SUBSXri: 953 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 954 case AArch64::SUBSXrs: 955 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 956 case AArch64::SUBSXrx: 957 return AArch64::SUBXrx; 958 } 959 } 960 961 enum AccessKind { 962 AK_Write = 0x01, 963 AK_Read = 0x10, 964 AK_All = 0x11 965 }; 966 967 /// True when condition flags are accessed (either by writing or reading) 968 /// on the instruction trace starting at From and ending at To. 969 /// 970 /// Note: If From and To are from different blocks it's assumed CC are accessed 971 /// on the path. 972 static bool areCFlagsAccessedBetweenInstrs( 973 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 974 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 975 // Early exit if To is at the beginning of the BB. 976 if (To == To->getParent()->begin()) 977 return true; 978 979 // Check whether the instructions are in the same basic block 980 // If not, assume the condition flags might get modified somewhere. 981 if (To->getParent() != From->getParent()) 982 return true; 983 984 // From must be above To. 985 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 986 [From](MachineInstr &MI) { 987 return MI.getIterator() == From; 988 }) != To->getParent()->rend()); 989 990 // We iterate backward starting \p To until we hit \p From. 991 for (--To; To != From; --To) { 992 const MachineInstr &Instr = *To; 993 994 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || 995 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 996 return true; 997 } 998 return false; 999 } 1000 1001 /// Try to optimize a compare instruction. A compare instruction is an 1002 /// instruction which produces AArch64::NZCV. It can be truly compare instruction 1003 /// when there are no uses of its destination register. 1004 /// 1005 /// The following steps are tried in order: 1006 /// 1. Convert CmpInstr into an unconditional version. 1007 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1008 /// condition code or an instruction which can be converted into such an instruction. 1009 /// Only comparison with zero is supported. 1010 bool AArch64InstrInfo::optimizeCompareInstr( 1011 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 1012 int CmpValue, const MachineRegisterInfo *MRI) const { 1013 assert(CmpInstr.getParent()); 1014 assert(MRI); 1015 1016 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1017 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1018 if (DeadNZCVIdx != -1) { 1019 if (CmpInstr.definesRegister(AArch64::WZR) || 1020 CmpInstr.definesRegister(AArch64::XZR)) { 1021 CmpInstr.eraseFromParent(); 1022 return true; 1023 } 1024 unsigned Opc = CmpInstr.getOpcode(); 1025 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); 1026 if (NewOpc == Opc) 1027 return false; 1028 const MCInstrDesc &MCID = get(NewOpc); 1029 CmpInstr.setDesc(MCID); 1030 CmpInstr.RemoveOperand(DeadNZCVIdx); 1031 bool succeeded = UpdateOperandRegClass(CmpInstr); 1032 (void)succeeded; 1033 assert(succeeded && "Some operands reg class are incompatible!"); 1034 return true; 1035 } 1036 1037 // Continue only if we have a "ri" where immediate is zero. 1038 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1039 // function. 1040 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1041 if (CmpValue != 0 || SrcReg2 != 0) 1042 return false; 1043 1044 // CmpInstr is a Compare instruction if destination register is not used. 1045 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1046 return false; 1047 1048 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1049 } 1050 1051 /// Get opcode of S version of Instr. 1052 /// If Instr is S version its opcode is returned. 1053 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1054 /// or we are not interested in it. 1055 static unsigned sForm(MachineInstr &Instr) { 1056 switch (Instr.getOpcode()) { 1057 default: 1058 return AArch64::INSTRUCTION_LIST_END; 1059 1060 case AArch64::ADDSWrr: 1061 case AArch64::ADDSWri: 1062 case AArch64::ADDSXrr: 1063 case AArch64::ADDSXri: 1064 case AArch64::SUBSWrr: 1065 case AArch64::SUBSWri: 1066 case AArch64::SUBSXrr: 1067 case AArch64::SUBSXri: 1068 return Instr.getOpcode(); 1069 1070 case AArch64::ADDWrr: return AArch64::ADDSWrr; 1071 case AArch64::ADDWri: return AArch64::ADDSWri; 1072 case AArch64::ADDXrr: return AArch64::ADDSXrr; 1073 case AArch64::ADDXri: return AArch64::ADDSXri; 1074 case AArch64::ADCWr: return AArch64::ADCSWr; 1075 case AArch64::ADCXr: return AArch64::ADCSXr; 1076 case AArch64::SUBWrr: return AArch64::SUBSWrr; 1077 case AArch64::SUBWri: return AArch64::SUBSWri; 1078 case AArch64::SUBXrr: return AArch64::SUBSXrr; 1079 case AArch64::SUBXri: return AArch64::SUBSXri; 1080 case AArch64::SBCWr: return AArch64::SBCSWr; 1081 case AArch64::SBCXr: return AArch64::SBCSXr; 1082 case AArch64::ANDWri: return AArch64::ANDSWri; 1083 case AArch64::ANDXri: return AArch64::ANDSXri; 1084 } 1085 } 1086 1087 /// Check if AArch64::NZCV should be alive in successors of MBB. 1088 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1089 for (auto *BB : MBB->successors()) 1090 if (BB->isLiveIn(AArch64::NZCV)) 1091 return true; 1092 return false; 1093 } 1094 1095 namespace { 1096 1097 struct UsedNZCV { 1098 bool N = false; 1099 bool Z = false; 1100 bool C = false; 1101 bool V = false; 1102 1103 UsedNZCV() = default; 1104 1105 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { 1106 this->N |= UsedFlags.N; 1107 this->Z |= UsedFlags.Z; 1108 this->C |= UsedFlags.C; 1109 this->V |= UsedFlags.V; 1110 return *this; 1111 } 1112 }; 1113 1114 } // end anonymous namespace 1115 1116 /// Find a condition code used by the instruction. 1117 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1118 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1119 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1120 switch (Instr.getOpcode()) { 1121 default: 1122 return AArch64CC::Invalid; 1123 1124 case AArch64::Bcc: { 1125 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1126 assert(Idx >= 2); 1127 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1128 } 1129 1130 case AArch64::CSINVWr: 1131 case AArch64::CSINVXr: 1132 case AArch64::CSINCWr: 1133 case AArch64::CSINCXr: 1134 case AArch64::CSELWr: 1135 case AArch64::CSELXr: 1136 case AArch64::CSNEGWr: 1137 case AArch64::CSNEGXr: 1138 case AArch64::FCSELSrrr: 1139 case AArch64::FCSELDrrr: { 1140 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1141 assert(Idx >= 1); 1142 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1143 } 1144 } 1145 } 1146 1147 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1148 assert(CC != AArch64CC::Invalid); 1149 UsedNZCV UsedFlags; 1150 switch (CC) { 1151 default: 1152 break; 1153 1154 case AArch64CC::EQ: // Z set 1155 case AArch64CC::NE: // Z clear 1156 UsedFlags.Z = true; 1157 break; 1158 1159 case AArch64CC::HI: // Z clear and C set 1160 case AArch64CC::LS: // Z set or C clear 1161 UsedFlags.Z = true; 1162 case AArch64CC::HS: // C set 1163 case AArch64CC::LO: // C clear 1164 UsedFlags.C = true; 1165 break; 1166 1167 case AArch64CC::MI: // N set 1168 case AArch64CC::PL: // N clear 1169 UsedFlags.N = true; 1170 break; 1171 1172 case AArch64CC::VS: // V set 1173 case AArch64CC::VC: // V clear 1174 UsedFlags.V = true; 1175 break; 1176 1177 case AArch64CC::GT: // Z clear, N and V the same 1178 case AArch64CC::LE: // Z set, N and V differ 1179 UsedFlags.Z = true; 1180 case AArch64CC::GE: // N and V the same 1181 case AArch64CC::LT: // N and V differ 1182 UsedFlags.N = true; 1183 UsedFlags.V = true; 1184 break; 1185 } 1186 return UsedFlags; 1187 } 1188 1189 static bool isADDSRegImm(unsigned Opcode) { 1190 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1191 } 1192 1193 static bool isSUBSRegImm(unsigned Opcode) { 1194 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1195 } 1196 1197 /// Check if CmpInstr can be substituted by MI. 1198 /// 1199 /// CmpInstr can be substituted: 1200 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1201 /// - and, MI and CmpInstr are from the same MachineBB 1202 /// - and, condition flags are not alive in successors of the CmpInstr parent 1203 /// - and, if MI opcode is the S form there must be no defs of flags between 1204 /// MI and CmpInstr 1205 /// or if MI opcode is not the S form there must be neither defs of flags 1206 /// nor uses of flags between MI and CmpInstr. 1207 /// - and C/V flags are not used after CmpInstr 1208 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1209 const TargetRegisterInfo *TRI) { 1210 assert(MI); 1211 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1212 assert(CmpInstr); 1213 1214 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1215 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1216 return false; 1217 1218 if (MI->getParent() != CmpInstr->getParent()) 1219 return false; 1220 1221 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1222 return false; 1223 1224 AccessKind AccessToCheck = AK_Write; 1225 if (sForm(*MI) != MI->getOpcode()) 1226 AccessToCheck = AK_All; 1227 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1228 return false; 1229 1230 UsedNZCV NZCVUsedAfterCmp; 1231 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end(); 1232 I != E; ++I) { 1233 const MachineInstr &Instr = *I; 1234 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1235 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1236 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1237 return false; 1238 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1239 } 1240 1241 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1242 break; 1243 } 1244 1245 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1246 } 1247 1248 /// Substitute an instruction comparing to zero with another instruction 1249 /// which produces needed condition flags. 1250 /// 1251 /// Return true on success. 1252 bool AArch64InstrInfo::substituteCmpToZero( 1253 MachineInstr &CmpInstr, unsigned SrcReg, 1254 const MachineRegisterInfo *MRI) const { 1255 assert(MRI); 1256 // Get the unique definition of SrcReg. 1257 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1258 if (!MI) 1259 return false; 1260 1261 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1262 1263 unsigned NewOpc = sForm(*MI); 1264 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1265 return false; 1266 1267 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1268 return false; 1269 1270 // Update the instruction to set NZCV. 1271 MI->setDesc(get(NewOpc)); 1272 CmpInstr.eraseFromParent(); 1273 bool succeeded = UpdateOperandRegClass(*MI); 1274 (void)succeeded; 1275 assert(succeeded && "Some operands reg class are incompatible!"); 1276 MI->addRegisterDefined(AArch64::NZCV, TRI); 1277 return true; 1278 } 1279 1280 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1281 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 1282 return false; 1283 1284 MachineBasicBlock &MBB = *MI.getParent(); 1285 DebugLoc DL = MI.getDebugLoc(); 1286 unsigned Reg = MI.getOperand(0).getReg(); 1287 const GlobalValue *GV = 1288 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1289 const TargetMachine &TM = MBB.getParent()->getTarget(); 1290 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1291 const unsigned char MO_NC = AArch64II::MO_NC; 1292 1293 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1294 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1295 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 1296 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1297 .addReg(Reg, RegState::Kill) 1298 .addImm(0) 1299 .addMemOperand(*MI.memoperands_begin()); 1300 } else if (TM.getCodeModel() == CodeModel::Large) { 1301 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1302 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); 1303 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1304 .addReg(Reg, RegState::Kill) 1305 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); 1306 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1307 .addReg(Reg, RegState::Kill) 1308 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); 1309 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1310 .addReg(Reg, RegState::Kill) 1311 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); 1312 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1313 .addReg(Reg, RegState::Kill) 1314 .addImm(0) 1315 .addMemOperand(*MI.memoperands_begin()); 1316 } else { 1317 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1318 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1319 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1320 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1321 .addReg(Reg, RegState::Kill) 1322 .addGlobalAddress(GV, 0, LoFlags) 1323 .addMemOperand(*MI.memoperands_begin()); 1324 } 1325 1326 MBB.erase(MI); 1327 1328 return true; 1329 } 1330 1331 /// Return true if this is this instruction has a non-zero immediate 1332 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const { 1333 switch (MI.getOpcode()) { 1334 default: 1335 break; 1336 case AArch64::ADDSWrs: 1337 case AArch64::ADDSXrs: 1338 case AArch64::ADDWrs: 1339 case AArch64::ADDXrs: 1340 case AArch64::ANDSWrs: 1341 case AArch64::ANDSXrs: 1342 case AArch64::ANDWrs: 1343 case AArch64::ANDXrs: 1344 case AArch64::BICSWrs: 1345 case AArch64::BICSXrs: 1346 case AArch64::BICWrs: 1347 case AArch64::BICXrs: 1348 case AArch64::CRC32Brr: 1349 case AArch64::CRC32CBrr: 1350 case AArch64::CRC32CHrr: 1351 case AArch64::CRC32CWrr: 1352 case AArch64::CRC32CXrr: 1353 case AArch64::CRC32Hrr: 1354 case AArch64::CRC32Wrr: 1355 case AArch64::CRC32Xrr: 1356 case AArch64::EONWrs: 1357 case AArch64::EONXrs: 1358 case AArch64::EORWrs: 1359 case AArch64::EORXrs: 1360 case AArch64::ORNWrs: 1361 case AArch64::ORNXrs: 1362 case AArch64::ORRWrs: 1363 case AArch64::ORRXrs: 1364 case AArch64::SUBSWrs: 1365 case AArch64::SUBSXrs: 1366 case AArch64::SUBWrs: 1367 case AArch64::SUBXrs: 1368 if (MI.getOperand(3).isImm()) { 1369 unsigned val = MI.getOperand(3).getImm(); 1370 return (val != 0); 1371 } 1372 break; 1373 } 1374 return false; 1375 } 1376 1377 /// Return true if this is this instruction has a non-zero immediate 1378 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const { 1379 switch (MI.getOpcode()) { 1380 default: 1381 break; 1382 case AArch64::ADDSWrx: 1383 case AArch64::ADDSXrx: 1384 case AArch64::ADDSXrx64: 1385 case AArch64::ADDWrx: 1386 case AArch64::ADDXrx: 1387 case AArch64::ADDXrx64: 1388 case AArch64::SUBSWrx: 1389 case AArch64::SUBSXrx: 1390 case AArch64::SUBSXrx64: 1391 case AArch64::SUBWrx: 1392 case AArch64::SUBXrx: 1393 case AArch64::SUBXrx64: 1394 if (MI.getOperand(3).isImm()) { 1395 unsigned val = MI.getOperand(3).getImm(); 1396 return (val != 0); 1397 } 1398 break; 1399 } 1400 1401 return false; 1402 } 1403 1404 // Return true if this instruction simply sets its single destination register 1405 // to zero. This is equivalent to a register rename of the zero-register. 1406 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const { 1407 switch (MI.getOpcode()) { 1408 default: 1409 break; 1410 case AArch64::MOVZWi: 1411 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1412 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1413 assert(MI.getDesc().getNumOperands() == 3 && 1414 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1415 return true; 1416 } 1417 break; 1418 case AArch64::ANDWri: // and Rd, Rzr, #imm 1419 return MI.getOperand(1).getReg() == AArch64::WZR; 1420 case AArch64::ANDXri: 1421 return MI.getOperand(1).getReg() == AArch64::XZR; 1422 case TargetOpcode::COPY: 1423 return MI.getOperand(1).getReg() == AArch64::WZR; 1424 } 1425 return false; 1426 } 1427 1428 // Return true if this instruction simply renames a general register without 1429 // modifying bits. 1430 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const { 1431 switch (MI.getOpcode()) { 1432 default: 1433 break; 1434 case TargetOpcode::COPY: { 1435 // GPR32 copies will by lowered to ORRXrs 1436 unsigned DstReg = MI.getOperand(0).getReg(); 1437 return (AArch64::GPR32RegClass.contains(DstReg) || 1438 AArch64::GPR64RegClass.contains(DstReg)); 1439 } 1440 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1441 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1442 assert(MI.getDesc().getNumOperands() == 4 && 1443 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1444 return true; 1445 } 1446 break; 1447 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1448 if (MI.getOperand(2).getImm() == 0) { 1449 assert(MI.getDesc().getNumOperands() == 4 && 1450 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1451 return true; 1452 } 1453 break; 1454 } 1455 return false; 1456 } 1457 1458 // Return true if this instruction simply renames a general register without 1459 // modifying bits. 1460 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const { 1461 switch (MI.getOpcode()) { 1462 default: 1463 break; 1464 case TargetOpcode::COPY: { 1465 // FPR64 copies will by lowered to ORR.16b 1466 unsigned DstReg = MI.getOperand(0).getReg(); 1467 return (AArch64::FPR64RegClass.contains(DstReg) || 1468 AArch64::FPR128RegClass.contains(DstReg)); 1469 } 1470 case AArch64::ORRv16i8: 1471 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1472 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1473 "invalid ORRv16i8 operands"); 1474 return true; 1475 } 1476 break; 1477 } 1478 return false; 1479 } 1480 1481 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1482 int &FrameIndex) const { 1483 switch (MI.getOpcode()) { 1484 default: 1485 break; 1486 case AArch64::LDRWui: 1487 case AArch64::LDRXui: 1488 case AArch64::LDRBui: 1489 case AArch64::LDRHui: 1490 case AArch64::LDRSui: 1491 case AArch64::LDRDui: 1492 case AArch64::LDRQui: 1493 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1494 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1495 FrameIndex = MI.getOperand(1).getIndex(); 1496 return MI.getOperand(0).getReg(); 1497 } 1498 break; 1499 } 1500 1501 return 0; 1502 } 1503 1504 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1505 int &FrameIndex) const { 1506 switch (MI.getOpcode()) { 1507 default: 1508 break; 1509 case AArch64::STRWui: 1510 case AArch64::STRXui: 1511 case AArch64::STRBui: 1512 case AArch64::STRHui: 1513 case AArch64::STRSui: 1514 case AArch64::STRDui: 1515 case AArch64::STRQui: 1516 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1517 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1518 FrameIndex = MI.getOperand(1).getIndex(); 1519 return MI.getOperand(0).getReg(); 1520 } 1521 break; 1522 } 1523 return 0; 1524 } 1525 1526 /// Return true if this is load/store scales or extends its register offset. 1527 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1528 /// MI should be a memory op that allows scaled addressing. 1529 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const { 1530 switch (MI.getOpcode()) { 1531 default: 1532 break; 1533 case AArch64::LDRBBroW: 1534 case AArch64::LDRBroW: 1535 case AArch64::LDRDroW: 1536 case AArch64::LDRHHroW: 1537 case AArch64::LDRHroW: 1538 case AArch64::LDRQroW: 1539 case AArch64::LDRSBWroW: 1540 case AArch64::LDRSBXroW: 1541 case AArch64::LDRSHWroW: 1542 case AArch64::LDRSHXroW: 1543 case AArch64::LDRSWroW: 1544 case AArch64::LDRSroW: 1545 case AArch64::LDRWroW: 1546 case AArch64::LDRXroW: 1547 case AArch64::STRBBroW: 1548 case AArch64::STRBroW: 1549 case AArch64::STRDroW: 1550 case AArch64::STRHHroW: 1551 case AArch64::STRHroW: 1552 case AArch64::STRQroW: 1553 case AArch64::STRSroW: 1554 case AArch64::STRWroW: 1555 case AArch64::STRXroW: 1556 case AArch64::LDRBBroX: 1557 case AArch64::LDRBroX: 1558 case AArch64::LDRDroX: 1559 case AArch64::LDRHHroX: 1560 case AArch64::LDRHroX: 1561 case AArch64::LDRQroX: 1562 case AArch64::LDRSBWroX: 1563 case AArch64::LDRSBXroX: 1564 case AArch64::LDRSHWroX: 1565 case AArch64::LDRSHXroX: 1566 case AArch64::LDRSWroX: 1567 case AArch64::LDRSroX: 1568 case AArch64::LDRWroX: 1569 case AArch64::LDRXroX: 1570 case AArch64::STRBBroX: 1571 case AArch64::STRBroX: 1572 case AArch64::STRDroX: 1573 case AArch64::STRHHroX: 1574 case AArch64::STRHroX: 1575 case AArch64::STRQroX: 1576 case AArch64::STRSroX: 1577 case AArch64::STRWroX: 1578 case AArch64::STRXroX: 1579 1580 unsigned Val = MI.getOperand(3).getImm(); 1581 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1582 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1583 } 1584 return false; 1585 } 1586 1587 /// Check all MachineMemOperands for a hint to suppress pairing. 1588 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const { 1589 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1590 return MMO->getFlags() & MOSuppressPair; 1591 }); 1592 } 1593 1594 /// Set a flag on the first MachineMemOperand to suppress pairing. 1595 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const { 1596 if (MI.memoperands_empty()) 1597 return; 1598 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1599 } 1600 1601 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const { 1602 switch (Opc) { 1603 default: 1604 return false; 1605 case AArch64::STURSi: 1606 case AArch64::STURDi: 1607 case AArch64::STURQi: 1608 case AArch64::STURBBi: 1609 case AArch64::STURHHi: 1610 case AArch64::STURWi: 1611 case AArch64::STURXi: 1612 case AArch64::LDURSi: 1613 case AArch64::LDURDi: 1614 case AArch64::LDURQi: 1615 case AArch64::LDURWi: 1616 case AArch64::LDURXi: 1617 case AArch64::LDURSWi: 1618 case AArch64::LDURHHi: 1619 case AArch64::LDURBBi: 1620 case AArch64::LDURSBWi: 1621 case AArch64::LDURSHWi: 1622 return true; 1623 } 1624 } 1625 1626 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const { 1627 return isUnscaledLdSt(MI.getOpcode()); 1628 } 1629 1630 // Is this a candidate for ld/st merging or pairing? For example, we don't 1631 // touch volatiles or load/stores that have a hint to avoid pair formation. 1632 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { 1633 // If this is a volatile load/store, don't mess with it. 1634 if (MI.hasOrderedMemoryRef()) 1635 return false; 1636 1637 // Make sure this is a reg+imm (as opposed to an address reloc). 1638 assert(MI.getOperand(1).isReg() && "Expected a reg operand."); 1639 if (!MI.getOperand(2).isImm()) 1640 return false; 1641 1642 // Can't merge/pair if the instruction modifies the base register. 1643 // e.g., ldr x0, [x0] 1644 unsigned BaseReg = MI.getOperand(1).getReg(); 1645 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1646 if (MI.modifiesRegister(BaseReg, TRI)) 1647 return false; 1648 1649 // Check if this load/store has a hint to avoid pair formation. 1650 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1651 if (isLdStPairSuppressed(MI)) 1652 return false; 1653 1654 // On some CPUs quad load/store pairs are slower than two single load/stores. 1655 if (Subtarget.avoidQuadLdStPairs()) { 1656 switch (MI.getOpcode()) { 1657 default: 1658 break; 1659 case AArch64::LDURQi: 1660 case AArch64::STURQi: 1661 case AArch64::LDRQui: 1662 case AArch64::STRQui: 1663 return false; 1664 } 1665 } 1666 1667 return true; 1668 } 1669 1670 bool AArch64InstrInfo::getMemOpBaseRegImmOfs( 1671 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, 1672 const TargetRegisterInfo *TRI) const { 1673 unsigned Width; 1674 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); 1675 } 1676 1677 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( 1678 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width, 1679 const TargetRegisterInfo *TRI) const { 1680 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 1681 // Handle only loads/stores with base register followed by immediate offset. 1682 if (LdSt.getNumExplicitOperands() == 3) { 1683 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 1684 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) 1685 return false; 1686 } else if (LdSt.getNumExplicitOperands() == 4) { 1687 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 1688 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || 1689 !LdSt.getOperand(3).isImm()) 1690 return false; 1691 } else 1692 return false; 1693 1694 // Offset is calculated as the immediate operand multiplied by the scaling factor. 1695 // Unscaled instructions have scaling factor set to 1. 1696 unsigned Scale = 0; 1697 switch (LdSt.getOpcode()) { 1698 default: 1699 return false; 1700 case AArch64::LDURQi: 1701 case AArch64::STURQi: 1702 Width = 16; 1703 Scale = 1; 1704 break; 1705 case AArch64::LDURXi: 1706 case AArch64::LDURDi: 1707 case AArch64::STURXi: 1708 case AArch64::STURDi: 1709 Width = 8; 1710 Scale = 1; 1711 break; 1712 case AArch64::LDURWi: 1713 case AArch64::LDURSi: 1714 case AArch64::LDURSWi: 1715 case AArch64::STURWi: 1716 case AArch64::STURSi: 1717 Width = 4; 1718 Scale = 1; 1719 break; 1720 case AArch64::LDURHi: 1721 case AArch64::LDURHHi: 1722 case AArch64::LDURSHXi: 1723 case AArch64::LDURSHWi: 1724 case AArch64::STURHi: 1725 case AArch64::STURHHi: 1726 Width = 2; 1727 Scale = 1; 1728 break; 1729 case AArch64::LDURBi: 1730 case AArch64::LDURBBi: 1731 case AArch64::LDURSBXi: 1732 case AArch64::LDURSBWi: 1733 case AArch64::STURBi: 1734 case AArch64::STURBBi: 1735 Width = 1; 1736 Scale = 1; 1737 break; 1738 case AArch64::LDPQi: 1739 case AArch64::LDNPQi: 1740 case AArch64::STPQi: 1741 case AArch64::STNPQi: 1742 Scale = 16; 1743 Width = 32; 1744 break; 1745 case AArch64::LDRQui: 1746 case AArch64::STRQui: 1747 Scale = Width = 16; 1748 break; 1749 case AArch64::LDPXi: 1750 case AArch64::LDPDi: 1751 case AArch64::LDNPXi: 1752 case AArch64::LDNPDi: 1753 case AArch64::STPXi: 1754 case AArch64::STPDi: 1755 case AArch64::STNPXi: 1756 case AArch64::STNPDi: 1757 Scale = 8; 1758 Width = 16; 1759 break; 1760 case AArch64::LDRXui: 1761 case AArch64::LDRDui: 1762 case AArch64::STRXui: 1763 case AArch64::STRDui: 1764 Scale = Width = 8; 1765 break; 1766 case AArch64::LDPWi: 1767 case AArch64::LDPSi: 1768 case AArch64::LDNPWi: 1769 case AArch64::LDNPSi: 1770 case AArch64::STPWi: 1771 case AArch64::STPSi: 1772 case AArch64::STNPWi: 1773 case AArch64::STNPSi: 1774 Scale = 4; 1775 Width = 8; 1776 break; 1777 case AArch64::LDRWui: 1778 case AArch64::LDRSui: 1779 case AArch64::LDRSWui: 1780 case AArch64::STRWui: 1781 case AArch64::STRSui: 1782 Scale = Width = 4; 1783 break; 1784 case AArch64::LDRHui: 1785 case AArch64::LDRHHui: 1786 case AArch64::STRHui: 1787 case AArch64::STRHHui: 1788 Scale = Width = 2; 1789 break; 1790 case AArch64::LDRBui: 1791 case AArch64::LDRBBui: 1792 case AArch64::STRBui: 1793 case AArch64::STRBBui: 1794 Scale = Width = 1; 1795 break; 1796 } 1797 1798 if (LdSt.getNumExplicitOperands() == 3) { 1799 BaseReg = LdSt.getOperand(1).getReg(); 1800 Offset = LdSt.getOperand(2).getImm() * Scale; 1801 } else { 1802 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 1803 BaseReg = LdSt.getOperand(2).getReg(); 1804 Offset = LdSt.getOperand(3).getImm() * Scale; 1805 } 1806 return true; 1807 } 1808 1809 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 1810 // scaled. 1811 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 1812 unsigned OffsetStride = 1; 1813 switch (Opc) { 1814 default: 1815 return false; 1816 case AArch64::LDURQi: 1817 case AArch64::STURQi: 1818 OffsetStride = 16; 1819 break; 1820 case AArch64::LDURXi: 1821 case AArch64::LDURDi: 1822 case AArch64::STURXi: 1823 case AArch64::STURDi: 1824 OffsetStride = 8; 1825 break; 1826 case AArch64::LDURWi: 1827 case AArch64::LDURSi: 1828 case AArch64::LDURSWi: 1829 case AArch64::STURWi: 1830 case AArch64::STURSi: 1831 OffsetStride = 4; 1832 break; 1833 } 1834 // If the byte-offset isn't a multiple of the stride, we can't scale this 1835 // offset. 1836 if (Offset % OffsetStride != 0) 1837 return false; 1838 1839 // Convert the byte-offset used by unscaled into an "element" offset used 1840 // by the scaled pair load/store instructions. 1841 Offset /= OffsetStride; 1842 return true; 1843 } 1844 1845 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 1846 if (FirstOpc == SecondOpc) 1847 return true; 1848 // We can also pair sign-ext and zero-ext instructions. 1849 switch (FirstOpc) { 1850 default: 1851 return false; 1852 case AArch64::LDRWui: 1853 case AArch64::LDURWi: 1854 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 1855 case AArch64::LDRSWui: 1856 case AArch64::LDURSWi: 1857 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 1858 } 1859 // These instructions can't be paired based on their opcodes. 1860 return false; 1861 } 1862 1863 /// Detect opportunities for ldp/stp formation. 1864 /// 1865 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true. 1866 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, 1867 MachineInstr &SecondLdSt, 1868 unsigned NumLoads) const { 1869 // Only cluster up to a single pair. 1870 if (NumLoads > 1) 1871 return false; 1872 1873 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 1874 return false; 1875 1876 // Can we pair these instructions based on their opcodes? 1877 unsigned FirstOpc = FirstLdSt.getOpcode(); 1878 unsigned SecondOpc = SecondLdSt.getOpcode(); 1879 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 1880 return false; 1881 1882 // Can't merge volatiles or load/stores that have a hint to avoid pair 1883 // formation, for example. 1884 if (!isCandidateToMergeOrPair(FirstLdSt) || 1885 !isCandidateToMergeOrPair(SecondLdSt)) 1886 return false; 1887 1888 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 1889 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 1890 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 1891 return false; 1892 1893 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 1894 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 1895 return false; 1896 1897 // Pairwise instructions have a 7-bit signed offset field. 1898 if (Offset1 > 63 || Offset1 < -64) 1899 return false; 1900 1901 // The caller should already have ordered First/SecondLdSt by offset. 1902 assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); 1903 return Offset1 + 1 == Offset2; 1904 } 1905 1906 bool AArch64InstrInfo::shouldScheduleAdjacent( 1907 const MachineInstr &First, const MachineInstr &Second) const { 1908 if (Subtarget.hasArithmeticBccFusion()) { 1909 // Fuse CMN, CMP, TST followed by Bcc. 1910 unsigned SecondOpcode = Second.getOpcode(); 1911 if (SecondOpcode == AArch64::Bcc) { 1912 switch (First.getOpcode()) { 1913 default: 1914 return false; 1915 case AArch64::ADDSWri: 1916 case AArch64::ADDSWrr: 1917 case AArch64::ADDSXri: 1918 case AArch64::ADDSXrr: 1919 case AArch64::ANDSWri: 1920 case AArch64::ANDSWrr: 1921 case AArch64::ANDSXri: 1922 case AArch64::ANDSXrr: 1923 case AArch64::SUBSWri: 1924 case AArch64::SUBSWrr: 1925 case AArch64::SUBSXri: 1926 case AArch64::SUBSXrr: 1927 case AArch64::BICSWrr: 1928 case AArch64::BICSXrr: 1929 return true; 1930 case AArch64::ADDSWrs: 1931 case AArch64::ADDSXrs: 1932 case AArch64::ANDSWrs: 1933 case AArch64::ANDSXrs: 1934 case AArch64::SUBSWrs: 1935 case AArch64::SUBSXrs: 1936 case AArch64::BICSWrs: 1937 case AArch64::BICSXrs: 1938 // Shift value can be 0 making these behave like the "rr" variant... 1939 return !hasShiftedReg(Second); 1940 } 1941 } 1942 } 1943 if (Subtarget.hasArithmeticCbzFusion()) { 1944 // Fuse ALU operations followed by CBZ/CBNZ. 1945 unsigned SecondOpcode = Second.getOpcode(); 1946 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || 1947 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { 1948 switch (First.getOpcode()) { 1949 default: 1950 return false; 1951 case AArch64::ADDWri: 1952 case AArch64::ADDWrr: 1953 case AArch64::ADDXri: 1954 case AArch64::ADDXrr: 1955 case AArch64::ANDWri: 1956 case AArch64::ANDWrr: 1957 case AArch64::ANDXri: 1958 case AArch64::ANDXrr: 1959 case AArch64::EORWri: 1960 case AArch64::EORWrr: 1961 case AArch64::EORXri: 1962 case AArch64::EORXrr: 1963 case AArch64::ORRWri: 1964 case AArch64::ORRWrr: 1965 case AArch64::ORRXri: 1966 case AArch64::ORRXrr: 1967 case AArch64::SUBWri: 1968 case AArch64::SUBWrr: 1969 case AArch64::SUBXri: 1970 case AArch64::SUBXrr: 1971 return true; 1972 case AArch64::ADDWrs: 1973 case AArch64::ADDXrs: 1974 case AArch64::ANDWrs: 1975 case AArch64::ANDXrs: 1976 case AArch64::SUBWrs: 1977 case AArch64::SUBXrs: 1978 case AArch64::BICWrs: 1979 case AArch64::BICXrs: 1980 // Shift value can be 0 making these behave like the "rr" variant... 1981 return !hasShiftedReg(Second); 1982 } 1983 } 1984 } 1985 return false; 1986 } 1987 1988 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( 1989 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, 1990 const MDNode *Expr, const DebugLoc &DL) const { 1991 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1992 .addFrameIndex(FrameIx) 1993 .addImm(0) 1994 .addImm(Offset) 1995 .addMetadata(Var) 1996 .addMetadata(Expr); 1997 return &*MIB; 1998 } 1999 2000 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2001 unsigned Reg, unsigned SubIdx, 2002 unsigned State, 2003 const TargetRegisterInfo *TRI) { 2004 if (!SubIdx) 2005 return MIB.addReg(Reg, State); 2006 2007 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2008 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2009 return MIB.addReg(Reg, State, SubIdx); 2010 } 2011 2012 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2013 unsigned NumRegs) { 2014 // We really want the positive remainder mod 32 here, that happens to be 2015 // easily obtainable with a mask. 2016 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2017 } 2018 2019 void AArch64InstrInfo::copyPhysRegTuple( 2020 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, 2021 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 2022 ArrayRef<unsigned> Indices) const { 2023 assert(Subtarget.hasNEON() && 2024 "Unexpected register copy without NEON"); 2025 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2026 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2027 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2028 unsigned NumRegs = Indices.size(); 2029 2030 int SubReg = 0, End = NumRegs, Incr = 1; 2031 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2032 SubReg = NumRegs - 1; 2033 End = -1; 2034 Incr = -1; 2035 } 2036 2037 for (; SubReg != End; SubReg += Incr) { 2038 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2039 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2040 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2041 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2042 } 2043 } 2044 2045 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2046 MachineBasicBlock::iterator I, 2047 const DebugLoc &DL, unsigned DestReg, 2048 unsigned SrcReg, bool KillSrc) const { 2049 if (AArch64::GPR32spRegClass.contains(DestReg) && 2050 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2051 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2052 2053 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2054 // If either operand is WSP, expand to ADD #0. 2055 if (Subtarget.hasZeroCycleRegMove()) { 2056 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2057 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2058 &AArch64::GPR64spRegClass); 2059 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2060 &AArch64::GPR64spRegClass); 2061 // This instruction is reading and writing X registers. This may upset 2062 // the register scavenger and machine verifier, so we need to indicate 2063 // that we are reading an undefined value from SrcRegX, but a proper 2064 // value from SrcReg. 2065 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2066 .addReg(SrcRegX, RegState::Undef) 2067 .addImm(0) 2068 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2069 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2070 } else { 2071 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2072 .addReg(SrcReg, getKillRegState(KillSrc)) 2073 .addImm(0) 2074 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2075 } 2076 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 2077 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 2078 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2079 } else { 2080 if (Subtarget.hasZeroCycleRegMove()) { 2081 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2082 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2083 &AArch64::GPR64spRegClass); 2084 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2085 &AArch64::GPR64spRegClass); 2086 // This instruction is reading and writing X registers. This may upset 2087 // the register scavenger and machine verifier, so we need to indicate 2088 // that we are reading an undefined value from SrcRegX, but a proper 2089 // value from SrcReg. 2090 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2091 .addReg(AArch64::XZR) 2092 .addReg(SrcRegX, RegState::Undef) 2093 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2094 } else { 2095 // Otherwise, expand to ORR WZR. 2096 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2097 .addReg(AArch64::WZR) 2098 .addReg(SrcReg, getKillRegState(KillSrc)); 2099 } 2100 } 2101 return; 2102 } 2103 2104 if (AArch64::GPR64spRegClass.contains(DestReg) && 2105 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2106 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2107 // If either operand is SP, expand to ADD #0. 2108 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2109 .addReg(SrcReg, getKillRegState(KillSrc)) 2110 .addImm(0) 2111 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2112 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 2113 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 2114 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2115 } else { 2116 // Otherwise, expand to ORR XZR. 2117 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2118 .addReg(AArch64::XZR) 2119 .addReg(SrcReg, getKillRegState(KillSrc)); 2120 } 2121 return; 2122 } 2123 2124 // Copy a DDDD register quad by copying the individual sub-registers. 2125 if (AArch64::DDDDRegClass.contains(DestReg) && 2126 AArch64::DDDDRegClass.contains(SrcReg)) { 2127 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 2128 AArch64::dsub2, AArch64::dsub3 }; 2129 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2130 Indices); 2131 return; 2132 } 2133 2134 // Copy a DDD register triple by copying the individual sub-registers. 2135 if (AArch64::DDDRegClass.contains(DestReg) && 2136 AArch64::DDDRegClass.contains(SrcReg)) { 2137 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 2138 AArch64::dsub2 }; 2139 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2140 Indices); 2141 return; 2142 } 2143 2144 // Copy a DD register pair by copying the individual sub-registers. 2145 if (AArch64::DDRegClass.contains(DestReg) && 2146 AArch64::DDRegClass.contains(SrcReg)) { 2147 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 2148 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2149 Indices); 2150 return; 2151 } 2152 2153 // Copy a QQQQ register quad by copying the individual sub-registers. 2154 if (AArch64::QQQQRegClass.contains(DestReg) && 2155 AArch64::QQQQRegClass.contains(SrcReg)) { 2156 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 2157 AArch64::qsub2, AArch64::qsub3 }; 2158 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2159 Indices); 2160 return; 2161 } 2162 2163 // Copy a QQQ register triple by copying the individual sub-registers. 2164 if (AArch64::QQQRegClass.contains(DestReg) && 2165 AArch64::QQQRegClass.contains(SrcReg)) { 2166 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 2167 AArch64::qsub2 }; 2168 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2169 Indices); 2170 return; 2171 } 2172 2173 // Copy a QQ register pair by copying the individual sub-registers. 2174 if (AArch64::QQRegClass.contains(DestReg) && 2175 AArch64::QQRegClass.contains(SrcReg)) { 2176 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 2177 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2178 Indices); 2179 return; 2180 } 2181 2182 if (AArch64::FPR128RegClass.contains(DestReg) && 2183 AArch64::FPR128RegClass.contains(SrcReg)) { 2184 if(Subtarget.hasNEON()) { 2185 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2186 .addReg(SrcReg) 2187 .addReg(SrcReg, getKillRegState(KillSrc)); 2188 } else { 2189 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2190 .addReg(AArch64::SP, RegState::Define) 2191 .addReg(SrcReg, getKillRegState(KillSrc)) 2192 .addReg(AArch64::SP) 2193 .addImm(-16); 2194 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2195 .addReg(AArch64::SP, RegState::Define) 2196 .addReg(DestReg, RegState::Define) 2197 .addReg(AArch64::SP) 2198 .addImm(16); 2199 } 2200 return; 2201 } 2202 2203 if (AArch64::FPR64RegClass.contains(DestReg) && 2204 AArch64::FPR64RegClass.contains(SrcReg)) { 2205 if(Subtarget.hasNEON()) { 2206 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2207 &AArch64::FPR128RegClass); 2208 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2209 &AArch64::FPR128RegClass); 2210 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2211 .addReg(SrcReg) 2212 .addReg(SrcReg, getKillRegState(KillSrc)); 2213 } else { 2214 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2215 .addReg(SrcReg, getKillRegState(KillSrc)); 2216 } 2217 return; 2218 } 2219 2220 if (AArch64::FPR32RegClass.contains(DestReg) && 2221 AArch64::FPR32RegClass.contains(SrcReg)) { 2222 if(Subtarget.hasNEON()) { 2223 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2224 &AArch64::FPR128RegClass); 2225 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2226 &AArch64::FPR128RegClass); 2227 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2228 .addReg(SrcReg) 2229 .addReg(SrcReg, getKillRegState(KillSrc)); 2230 } else { 2231 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2232 .addReg(SrcReg, getKillRegState(KillSrc)); 2233 } 2234 return; 2235 } 2236 2237 if (AArch64::FPR16RegClass.contains(DestReg) && 2238 AArch64::FPR16RegClass.contains(SrcReg)) { 2239 if(Subtarget.hasNEON()) { 2240 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2241 &AArch64::FPR128RegClass); 2242 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2243 &AArch64::FPR128RegClass); 2244 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2245 .addReg(SrcReg) 2246 .addReg(SrcReg, getKillRegState(KillSrc)); 2247 } else { 2248 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2249 &AArch64::FPR32RegClass); 2250 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2251 &AArch64::FPR32RegClass); 2252 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2253 .addReg(SrcReg, getKillRegState(KillSrc)); 2254 } 2255 return; 2256 } 2257 2258 if (AArch64::FPR8RegClass.contains(DestReg) && 2259 AArch64::FPR8RegClass.contains(SrcReg)) { 2260 if(Subtarget.hasNEON()) { 2261 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2262 &AArch64::FPR128RegClass); 2263 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2264 &AArch64::FPR128RegClass); 2265 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2266 .addReg(SrcReg) 2267 .addReg(SrcReg, getKillRegState(KillSrc)); 2268 } else { 2269 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2270 &AArch64::FPR32RegClass); 2271 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2272 &AArch64::FPR32RegClass); 2273 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2274 .addReg(SrcReg, getKillRegState(KillSrc)); 2275 } 2276 return; 2277 } 2278 2279 // Copies between GPR64 and FPR64. 2280 if (AArch64::FPR64RegClass.contains(DestReg) && 2281 AArch64::GPR64RegClass.contains(SrcReg)) { 2282 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2283 .addReg(SrcReg, getKillRegState(KillSrc)); 2284 return; 2285 } 2286 if (AArch64::GPR64RegClass.contains(DestReg) && 2287 AArch64::FPR64RegClass.contains(SrcReg)) { 2288 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2289 .addReg(SrcReg, getKillRegState(KillSrc)); 2290 return; 2291 } 2292 // Copies between GPR32 and FPR32. 2293 if (AArch64::FPR32RegClass.contains(DestReg) && 2294 AArch64::GPR32RegClass.contains(SrcReg)) { 2295 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2296 .addReg(SrcReg, getKillRegState(KillSrc)); 2297 return; 2298 } 2299 if (AArch64::GPR32RegClass.contains(DestReg) && 2300 AArch64::FPR32RegClass.contains(SrcReg)) { 2301 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2302 .addReg(SrcReg, getKillRegState(KillSrc)); 2303 return; 2304 } 2305 2306 if (DestReg == AArch64::NZCV) { 2307 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2308 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2309 .addImm(AArch64SysReg::NZCV) 2310 .addReg(SrcReg, getKillRegState(KillSrc)) 2311 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2312 return; 2313 } 2314 2315 if (SrcReg == AArch64::NZCV) { 2316 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2317 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2318 .addImm(AArch64SysReg::NZCV) 2319 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2320 return; 2321 } 2322 2323 llvm_unreachable("unimplemented reg-to-reg copy"); 2324 } 2325 2326 void AArch64InstrInfo::storeRegToStackSlot( 2327 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 2328 bool isKill, int FI, const TargetRegisterClass *RC, 2329 const TargetRegisterInfo *TRI) const { 2330 DebugLoc DL; 2331 if (MBBI != MBB.end()) 2332 DL = MBBI->getDebugLoc(); 2333 MachineFunction &MF = *MBB.getParent(); 2334 MachineFrameInfo &MFI = MF.getFrameInfo(); 2335 unsigned Align = MFI.getObjectAlignment(FI); 2336 2337 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2338 MachineMemOperand *MMO = MF.getMachineMemOperand( 2339 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 2340 unsigned Opc = 0; 2341 bool Offset = true; 2342 switch (RC->getSize()) { 2343 case 1: 2344 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2345 Opc = AArch64::STRBui; 2346 break; 2347 case 2: 2348 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2349 Opc = AArch64::STRHui; 2350 break; 2351 case 4: 2352 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2353 Opc = AArch64::STRWui; 2354 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2355 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 2356 else 2357 assert(SrcReg != AArch64::WSP); 2358 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2359 Opc = AArch64::STRSui; 2360 break; 2361 case 8: 2362 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2363 Opc = AArch64::STRXui; 2364 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 2365 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2366 else 2367 assert(SrcReg != AArch64::SP); 2368 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2369 Opc = AArch64::STRDui; 2370 break; 2371 case 16: 2372 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2373 Opc = AArch64::STRQui; 2374 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2375 assert(Subtarget.hasNEON() && 2376 "Unexpected register store without NEON"); 2377 Opc = AArch64::ST1Twov1d; 2378 Offset = false; 2379 } 2380 break; 2381 case 24: 2382 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2383 assert(Subtarget.hasNEON() && 2384 "Unexpected register store without NEON"); 2385 Opc = AArch64::ST1Threev1d; 2386 Offset = false; 2387 } 2388 break; 2389 case 32: 2390 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2391 assert(Subtarget.hasNEON() && 2392 "Unexpected register store without NEON"); 2393 Opc = AArch64::ST1Fourv1d; 2394 Offset = false; 2395 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2396 assert(Subtarget.hasNEON() && 2397 "Unexpected register store without NEON"); 2398 Opc = AArch64::ST1Twov2d; 2399 Offset = false; 2400 } 2401 break; 2402 case 48: 2403 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2404 assert(Subtarget.hasNEON() && 2405 "Unexpected register store without NEON"); 2406 Opc = AArch64::ST1Threev2d; 2407 Offset = false; 2408 } 2409 break; 2410 case 64: 2411 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2412 assert(Subtarget.hasNEON() && 2413 "Unexpected register store without NEON"); 2414 Opc = AArch64::ST1Fourv2d; 2415 Offset = false; 2416 } 2417 break; 2418 } 2419 assert(Opc && "Unknown register class"); 2420 2421 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2422 .addReg(SrcReg, getKillRegState(isKill)) 2423 .addFrameIndex(FI); 2424 2425 if (Offset) 2426 MI.addImm(0); 2427 MI.addMemOperand(MMO); 2428 } 2429 2430 void AArch64InstrInfo::loadRegFromStackSlot( 2431 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 2432 int FI, const TargetRegisterClass *RC, 2433 const TargetRegisterInfo *TRI) const { 2434 DebugLoc DL; 2435 if (MBBI != MBB.end()) 2436 DL = MBBI->getDebugLoc(); 2437 MachineFunction &MF = *MBB.getParent(); 2438 MachineFrameInfo &MFI = MF.getFrameInfo(); 2439 unsigned Align = MFI.getObjectAlignment(FI); 2440 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2441 MachineMemOperand *MMO = MF.getMachineMemOperand( 2442 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 2443 2444 unsigned Opc = 0; 2445 bool Offset = true; 2446 switch (RC->getSize()) { 2447 case 1: 2448 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2449 Opc = AArch64::LDRBui; 2450 break; 2451 case 2: 2452 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2453 Opc = AArch64::LDRHui; 2454 break; 2455 case 4: 2456 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2457 Opc = AArch64::LDRWui; 2458 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2459 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 2460 else 2461 assert(DestReg != AArch64::WSP); 2462 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2463 Opc = AArch64::LDRSui; 2464 break; 2465 case 8: 2466 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2467 Opc = AArch64::LDRXui; 2468 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 2469 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 2470 else 2471 assert(DestReg != AArch64::SP); 2472 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 2473 Opc = AArch64::LDRDui; 2474 break; 2475 case 16: 2476 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2477 Opc = AArch64::LDRQui; 2478 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2479 assert(Subtarget.hasNEON() && 2480 "Unexpected register load without NEON"); 2481 Opc = AArch64::LD1Twov1d; 2482 Offset = false; 2483 } 2484 break; 2485 case 24: 2486 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2487 assert(Subtarget.hasNEON() && 2488 "Unexpected register load without NEON"); 2489 Opc = AArch64::LD1Threev1d; 2490 Offset = false; 2491 } 2492 break; 2493 case 32: 2494 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2495 assert(Subtarget.hasNEON() && 2496 "Unexpected register load without NEON"); 2497 Opc = AArch64::LD1Fourv1d; 2498 Offset = false; 2499 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2500 assert(Subtarget.hasNEON() && 2501 "Unexpected register load without NEON"); 2502 Opc = AArch64::LD1Twov2d; 2503 Offset = false; 2504 } 2505 break; 2506 case 48: 2507 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2508 assert(Subtarget.hasNEON() && 2509 "Unexpected register load without NEON"); 2510 Opc = AArch64::LD1Threev2d; 2511 Offset = false; 2512 } 2513 break; 2514 case 64: 2515 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2516 assert(Subtarget.hasNEON() && 2517 "Unexpected register load without NEON"); 2518 Opc = AArch64::LD1Fourv2d; 2519 Offset = false; 2520 } 2521 break; 2522 } 2523 assert(Opc && "Unknown register class"); 2524 2525 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2526 .addReg(DestReg, getDefRegState(true)) 2527 .addFrameIndex(FI); 2528 if (Offset) 2529 MI.addImm(0); 2530 MI.addMemOperand(MMO); 2531 } 2532 2533 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2534 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 2535 unsigned DestReg, unsigned SrcReg, int Offset, 2536 const TargetInstrInfo *TII, 2537 MachineInstr::MIFlag Flag, bool SetNZCV) { 2538 if (DestReg == SrcReg && Offset == 0) 2539 return; 2540 2541 assert((DestReg != AArch64::SP || Offset % 16 == 0) && 2542 "SP increment/decrement not 16-byte aligned"); 2543 2544 bool isSub = Offset < 0; 2545 if (isSub) 2546 Offset = -Offset; 2547 2548 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 2549 // scratch register. If DestReg is a virtual register, use it as the 2550 // scratch register; otherwise, create a new virtual register (to be 2551 // replaced by the scavenger at the end of PEI). That case can be optimized 2552 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 2553 // register can be loaded with offset%8 and the add/sub can use an extending 2554 // instruction with LSL#3. 2555 // Currently the function handles any offsets but generates a poor sequence 2556 // of code. 2557 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 2558 2559 unsigned Opc; 2560 if (SetNZCV) 2561 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 2562 else 2563 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 2564 const unsigned MaxEncoding = 0xfff; 2565 const unsigned ShiftSize = 12; 2566 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 2567 while (((unsigned)Offset) >= (1 << ShiftSize)) { 2568 unsigned ThisVal; 2569 if (((unsigned)Offset) > MaxEncodableValue) { 2570 ThisVal = MaxEncodableValue; 2571 } else { 2572 ThisVal = Offset & MaxEncodableValue; 2573 } 2574 assert((ThisVal >> ShiftSize) <= MaxEncoding && 2575 "Encoding cannot handle value that big"); 2576 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2577 .addReg(SrcReg) 2578 .addImm(ThisVal >> ShiftSize) 2579 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 2580 .setMIFlag(Flag); 2581 2582 SrcReg = DestReg; 2583 Offset -= ThisVal; 2584 if (Offset == 0) 2585 return; 2586 } 2587 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2588 .addReg(SrcReg) 2589 .addImm(Offset) 2590 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2591 .setMIFlag(Flag); 2592 } 2593 2594 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 2595 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 2596 MachineBasicBlock::iterator InsertPt, int FrameIndex, 2597 LiveIntervals *LIS) const { 2598 // This is a bit of a hack. Consider this instruction: 2599 // 2600 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 2601 // 2602 // We explicitly chose GPR64all for the virtual register so such a copy might 2603 // be eliminated by RegisterCoalescer. However, that may not be possible, and 2604 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 2605 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 2606 // 2607 // To prevent that, we are going to constrain the %vreg0 register class here. 2608 // 2609 // <rdar://problem/11522048> 2610 // 2611 if (MI.isFullCopy()) { 2612 unsigned DstReg = MI.getOperand(0).getReg(); 2613 unsigned SrcReg = MI.getOperand(1).getReg(); 2614 if (SrcReg == AArch64::SP && 2615 TargetRegisterInfo::isVirtualRegister(DstReg)) { 2616 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 2617 return nullptr; 2618 } 2619 if (DstReg == AArch64::SP && 2620 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 2621 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2622 return nullptr; 2623 } 2624 } 2625 2626 // Handle the case where a copy is being spilled or filled but the source 2627 // and destination register class don't match. For example: 2628 // 2629 // %vreg0<def> = COPY %XZR; GPR64common:%vreg0 2630 // 2631 // In this case we can still safely fold away the COPY and generate the 2632 // following spill code: 2633 // 2634 // STRXui %XZR, <fi#0> 2635 // 2636 // This also eliminates spilled cross register class COPYs (e.g. between x and 2637 // d regs) of the same size. For example: 2638 // 2639 // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1 2640 // 2641 // will be filled as 2642 // 2643 // LDRDui %vreg0, fi<#0> 2644 // 2645 // instead of 2646 // 2647 // LDRXui %vregTemp, fi<#0> 2648 // %vreg0 = FMOV %vregTemp 2649 // 2650 if (MI.isCopy() && Ops.size() == 1 && 2651 // Make sure we're only folding the explicit COPY defs/uses. 2652 (Ops[0] == 0 || Ops[0] == 1)) { 2653 bool IsSpill = Ops[0] == 0; 2654 bool IsFill = !IsSpill; 2655 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 2656 const MachineRegisterInfo &MRI = MF.getRegInfo(); 2657 MachineBasicBlock &MBB = *MI.getParent(); 2658 const MachineOperand &DstMO = MI.getOperand(0); 2659 const MachineOperand &SrcMO = MI.getOperand(1); 2660 unsigned DstReg = DstMO.getReg(); 2661 unsigned SrcReg = SrcMO.getReg(); 2662 // This is slightly expensive to compute for physical regs since 2663 // getMinimalPhysRegClass is slow. 2664 auto getRegClass = [&](unsigned Reg) { 2665 return TargetRegisterInfo::isVirtualRegister(Reg) 2666 ? MRI.getRegClass(Reg) 2667 : TRI.getMinimalPhysRegClass(Reg); 2668 }; 2669 2670 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 2671 assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && 2672 "Mismatched register size in non subreg COPY"); 2673 if (IsSpill) 2674 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 2675 getRegClass(SrcReg), &TRI); 2676 else 2677 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 2678 getRegClass(DstReg), &TRI); 2679 return &*--InsertPt; 2680 } 2681 2682 // Handle cases like spilling def of: 2683 // 2684 // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0 2685 // 2686 // where the physical register source can be widened and stored to the full 2687 // virtual reg destination stack slot, in this case producing: 2688 // 2689 // STRXui %XZR, <fi#0> 2690 // 2691 if (IsSpill && DstMO.isUndef() && 2692 TargetRegisterInfo::isPhysicalRegister(SrcReg)) { 2693 assert(SrcMO.getSubReg() == 0 && 2694 "Unexpected subreg on physical register"); 2695 const TargetRegisterClass *SpillRC; 2696 unsigned SpillSubreg; 2697 switch (DstMO.getSubReg()) { 2698 default: 2699 SpillRC = nullptr; 2700 break; 2701 case AArch64::sub_32: 2702 case AArch64::ssub: 2703 if (AArch64::GPR32RegClass.contains(SrcReg)) { 2704 SpillRC = &AArch64::GPR64RegClass; 2705 SpillSubreg = AArch64::sub_32; 2706 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 2707 SpillRC = &AArch64::FPR64RegClass; 2708 SpillSubreg = AArch64::ssub; 2709 } else 2710 SpillRC = nullptr; 2711 break; 2712 case AArch64::dsub: 2713 if (AArch64::FPR64RegClass.contains(SrcReg)) { 2714 SpillRC = &AArch64::FPR128RegClass; 2715 SpillSubreg = AArch64::dsub; 2716 } else 2717 SpillRC = nullptr; 2718 break; 2719 } 2720 2721 if (SpillRC) 2722 if (unsigned WidenedSrcReg = 2723 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 2724 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 2725 FrameIndex, SpillRC, &TRI); 2726 return &*--InsertPt; 2727 } 2728 } 2729 2730 // Handle cases like filling use of: 2731 // 2732 // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1 2733 // 2734 // where we can load the full virtual reg source stack slot, into the subreg 2735 // destination, in this case producing: 2736 // 2737 // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0> 2738 // 2739 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 2740 const TargetRegisterClass *FillRC; 2741 switch (DstMO.getSubReg()) { 2742 default: 2743 FillRC = nullptr; 2744 break; 2745 case AArch64::sub_32: 2746 FillRC = &AArch64::GPR32RegClass; 2747 break; 2748 case AArch64::ssub: 2749 FillRC = &AArch64::FPR32RegClass; 2750 break; 2751 case AArch64::dsub: 2752 FillRC = &AArch64::FPR64RegClass; 2753 break; 2754 } 2755 2756 if (FillRC) { 2757 assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && 2758 "Mismatched regclass size on folded subreg COPY"); 2759 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 2760 MachineInstr &LoadMI = *--InsertPt; 2761 MachineOperand &LoadDst = LoadMI.getOperand(0); 2762 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 2763 LoadDst.setSubReg(DstMO.getSubReg()); 2764 LoadDst.setIsUndef(); 2765 return &LoadMI; 2766 } 2767 } 2768 } 2769 2770 // Cannot fold. 2771 return nullptr; 2772 } 2773 2774 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 2775 bool *OutUseUnscaledOp, 2776 unsigned *OutUnscaledOp, 2777 int *EmittableOffset) { 2778 int Scale = 1; 2779 bool IsSigned = false; 2780 // The ImmIdx should be changed case by case if it is not 2. 2781 unsigned ImmIdx = 2; 2782 unsigned UnscaledOp = 0; 2783 // Set output values in case of early exit. 2784 if (EmittableOffset) 2785 *EmittableOffset = 0; 2786 if (OutUseUnscaledOp) 2787 *OutUseUnscaledOp = false; 2788 if (OutUnscaledOp) 2789 *OutUnscaledOp = 0; 2790 switch (MI.getOpcode()) { 2791 default: 2792 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 2793 // Vector spills/fills can't take an immediate offset. 2794 case AArch64::LD1Twov2d: 2795 case AArch64::LD1Threev2d: 2796 case AArch64::LD1Fourv2d: 2797 case AArch64::LD1Twov1d: 2798 case AArch64::LD1Threev1d: 2799 case AArch64::LD1Fourv1d: 2800 case AArch64::ST1Twov2d: 2801 case AArch64::ST1Threev2d: 2802 case AArch64::ST1Fourv2d: 2803 case AArch64::ST1Twov1d: 2804 case AArch64::ST1Threev1d: 2805 case AArch64::ST1Fourv1d: 2806 return AArch64FrameOffsetCannotUpdate; 2807 case AArch64::PRFMui: 2808 Scale = 8; 2809 UnscaledOp = AArch64::PRFUMi; 2810 break; 2811 case AArch64::LDRXui: 2812 Scale = 8; 2813 UnscaledOp = AArch64::LDURXi; 2814 break; 2815 case AArch64::LDRWui: 2816 Scale = 4; 2817 UnscaledOp = AArch64::LDURWi; 2818 break; 2819 case AArch64::LDRBui: 2820 Scale = 1; 2821 UnscaledOp = AArch64::LDURBi; 2822 break; 2823 case AArch64::LDRHui: 2824 Scale = 2; 2825 UnscaledOp = AArch64::LDURHi; 2826 break; 2827 case AArch64::LDRSui: 2828 Scale = 4; 2829 UnscaledOp = AArch64::LDURSi; 2830 break; 2831 case AArch64::LDRDui: 2832 Scale = 8; 2833 UnscaledOp = AArch64::LDURDi; 2834 break; 2835 case AArch64::LDRQui: 2836 Scale = 16; 2837 UnscaledOp = AArch64::LDURQi; 2838 break; 2839 case AArch64::LDRBBui: 2840 Scale = 1; 2841 UnscaledOp = AArch64::LDURBBi; 2842 break; 2843 case AArch64::LDRHHui: 2844 Scale = 2; 2845 UnscaledOp = AArch64::LDURHHi; 2846 break; 2847 case AArch64::LDRSBXui: 2848 Scale = 1; 2849 UnscaledOp = AArch64::LDURSBXi; 2850 break; 2851 case AArch64::LDRSBWui: 2852 Scale = 1; 2853 UnscaledOp = AArch64::LDURSBWi; 2854 break; 2855 case AArch64::LDRSHXui: 2856 Scale = 2; 2857 UnscaledOp = AArch64::LDURSHXi; 2858 break; 2859 case AArch64::LDRSHWui: 2860 Scale = 2; 2861 UnscaledOp = AArch64::LDURSHWi; 2862 break; 2863 case AArch64::LDRSWui: 2864 Scale = 4; 2865 UnscaledOp = AArch64::LDURSWi; 2866 break; 2867 2868 case AArch64::STRXui: 2869 Scale = 8; 2870 UnscaledOp = AArch64::STURXi; 2871 break; 2872 case AArch64::STRWui: 2873 Scale = 4; 2874 UnscaledOp = AArch64::STURWi; 2875 break; 2876 case AArch64::STRBui: 2877 Scale = 1; 2878 UnscaledOp = AArch64::STURBi; 2879 break; 2880 case AArch64::STRHui: 2881 Scale = 2; 2882 UnscaledOp = AArch64::STURHi; 2883 break; 2884 case AArch64::STRSui: 2885 Scale = 4; 2886 UnscaledOp = AArch64::STURSi; 2887 break; 2888 case AArch64::STRDui: 2889 Scale = 8; 2890 UnscaledOp = AArch64::STURDi; 2891 break; 2892 case AArch64::STRQui: 2893 Scale = 16; 2894 UnscaledOp = AArch64::STURQi; 2895 break; 2896 case AArch64::STRBBui: 2897 Scale = 1; 2898 UnscaledOp = AArch64::STURBBi; 2899 break; 2900 case AArch64::STRHHui: 2901 Scale = 2; 2902 UnscaledOp = AArch64::STURHHi; 2903 break; 2904 2905 case AArch64::LDPXi: 2906 case AArch64::LDPDi: 2907 case AArch64::STPXi: 2908 case AArch64::STPDi: 2909 case AArch64::LDNPXi: 2910 case AArch64::LDNPDi: 2911 case AArch64::STNPXi: 2912 case AArch64::STNPDi: 2913 ImmIdx = 3; 2914 IsSigned = true; 2915 Scale = 8; 2916 break; 2917 case AArch64::LDPQi: 2918 case AArch64::STPQi: 2919 case AArch64::LDNPQi: 2920 case AArch64::STNPQi: 2921 ImmIdx = 3; 2922 IsSigned = true; 2923 Scale = 16; 2924 break; 2925 case AArch64::LDPWi: 2926 case AArch64::LDPSi: 2927 case AArch64::STPWi: 2928 case AArch64::STPSi: 2929 case AArch64::LDNPWi: 2930 case AArch64::LDNPSi: 2931 case AArch64::STNPWi: 2932 case AArch64::STNPSi: 2933 ImmIdx = 3; 2934 IsSigned = true; 2935 Scale = 4; 2936 break; 2937 2938 case AArch64::LDURXi: 2939 case AArch64::LDURWi: 2940 case AArch64::LDURBi: 2941 case AArch64::LDURHi: 2942 case AArch64::LDURSi: 2943 case AArch64::LDURDi: 2944 case AArch64::LDURQi: 2945 case AArch64::LDURHHi: 2946 case AArch64::LDURBBi: 2947 case AArch64::LDURSBXi: 2948 case AArch64::LDURSBWi: 2949 case AArch64::LDURSHXi: 2950 case AArch64::LDURSHWi: 2951 case AArch64::LDURSWi: 2952 case AArch64::STURXi: 2953 case AArch64::STURWi: 2954 case AArch64::STURBi: 2955 case AArch64::STURHi: 2956 case AArch64::STURSi: 2957 case AArch64::STURDi: 2958 case AArch64::STURQi: 2959 case AArch64::STURBBi: 2960 case AArch64::STURHHi: 2961 Scale = 1; 2962 break; 2963 } 2964 2965 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2966 2967 bool useUnscaledOp = false; 2968 // If the offset doesn't match the scale, we rewrite the instruction to 2969 // use the unscaled instruction instead. Likewise, if we have a negative 2970 // offset (and have an unscaled op to use). 2971 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2972 useUnscaledOp = true; 2973 2974 // Use an unscaled addressing mode if the instruction has a negative offset 2975 // (or if the instruction is already using an unscaled addressing mode). 2976 unsigned MaskBits; 2977 if (IsSigned) { 2978 // ldp/stp instructions. 2979 MaskBits = 7; 2980 Offset /= Scale; 2981 } else if (UnscaledOp == 0 || useUnscaledOp) { 2982 MaskBits = 9; 2983 IsSigned = true; 2984 Scale = 1; 2985 } else { 2986 MaskBits = 12; 2987 IsSigned = false; 2988 Offset /= Scale; 2989 } 2990 2991 // Attempt to fold address computation. 2992 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2993 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2994 if (Offset >= MinOff && Offset <= MaxOff) { 2995 if (EmittableOffset) 2996 *EmittableOffset = Offset; 2997 Offset = 0; 2998 } else { 2999 int NewOff = Offset < 0 ? MinOff : MaxOff; 3000 if (EmittableOffset) 3001 *EmittableOffset = NewOff; 3002 Offset = (Offset - NewOff) * Scale; 3003 } 3004 if (OutUseUnscaledOp) 3005 *OutUseUnscaledOp = useUnscaledOp; 3006 if (OutUnscaledOp) 3007 *OutUnscaledOp = UnscaledOp; 3008 return AArch64FrameOffsetCanUpdate | 3009 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 3010 } 3011 3012 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3013 unsigned FrameReg, int &Offset, 3014 const AArch64InstrInfo *TII) { 3015 unsigned Opcode = MI.getOpcode(); 3016 unsigned ImmIdx = FrameRegIdx + 1; 3017 3018 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3019 Offset += MI.getOperand(ImmIdx).getImm(); 3020 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3021 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3022 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3023 MI.eraseFromParent(); 3024 Offset = 0; 3025 return true; 3026 } 3027 3028 int NewOffset; 3029 unsigned UnscaledOp; 3030 bool UseUnscaledOp; 3031 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3032 &UnscaledOp, &NewOffset); 3033 if (Status & AArch64FrameOffsetCanUpdate) { 3034 if (Status & AArch64FrameOffsetIsLegal) 3035 // Replace the FrameIndex with FrameReg. 3036 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3037 if (UseUnscaledOp) 3038 MI.setDesc(TII->get(UnscaledOp)); 3039 3040 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3041 return Offset == 0; 3042 } 3043 3044 return false; 3045 } 3046 3047 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 3048 NopInst.setOpcode(AArch64::HINT); 3049 NopInst.addOperand(MCOperand::createImm(0)); 3050 } 3051 3052 // AArch64 supports MachineCombiner. 3053 bool AArch64InstrInfo::useMachineCombiner() const { 3054 3055 return true; 3056 } 3057 3058 // True when Opc sets flag 3059 static bool isCombineInstrSettingFlag(unsigned Opc) { 3060 switch (Opc) { 3061 case AArch64::ADDSWrr: 3062 case AArch64::ADDSWri: 3063 case AArch64::ADDSXrr: 3064 case AArch64::ADDSXri: 3065 case AArch64::SUBSWrr: 3066 case AArch64::SUBSXrr: 3067 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3068 case AArch64::SUBSWri: 3069 case AArch64::SUBSXri: 3070 return true; 3071 default: 3072 break; 3073 } 3074 return false; 3075 } 3076 3077 // 32b Opcodes that can be combined with a MUL 3078 static bool isCombineInstrCandidate32(unsigned Opc) { 3079 switch (Opc) { 3080 case AArch64::ADDWrr: 3081 case AArch64::ADDWri: 3082 case AArch64::SUBWrr: 3083 case AArch64::ADDSWrr: 3084 case AArch64::ADDSWri: 3085 case AArch64::SUBSWrr: 3086 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3087 case AArch64::SUBWri: 3088 case AArch64::SUBSWri: 3089 return true; 3090 default: 3091 break; 3092 } 3093 return false; 3094 } 3095 3096 // 64b Opcodes that can be combined with a MUL 3097 static bool isCombineInstrCandidate64(unsigned Opc) { 3098 switch (Opc) { 3099 case AArch64::ADDXrr: 3100 case AArch64::ADDXri: 3101 case AArch64::SUBXrr: 3102 case AArch64::ADDSXrr: 3103 case AArch64::ADDSXri: 3104 case AArch64::SUBSXrr: 3105 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3106 case AArch64::SUBXri: 3107 case AArch64::SUBSXri: 3108 return true; 3109 default: 3110 break; 3111 } 3112 return false; 3113 } 3114 3115 // FP Opcodes that can be combined with a FMUL 3116 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3117 switch (Inst.getOpcode()) { 3118 default: 3119 break; 3120 case AArch64::FADDSrr: 3121 case AArch64::FADDDrr: 3122 case AArch64::FADDv2f32: 3123 case AArch64::FADDv2f64: 3124 case AArch64::FADDv4f32: 3125 case AArch64::FSUBSrr: 3126 case AArch64::FSUBDrr: 3127 case AArch64::FSUBv2f32: 3128 case AArch64::FSUBv2f64: 3129 case AArch64::FSUBv4f32: 3130 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3131 return (Options.UnsafeFPMath || 3132 Options.AllowFPOpFusion == FPOpFusion::Fast); 3133 } 3134 return false; 3135 } 3136 3137 // Opcodes that can be combined with a MUL 3138 static bool isCombineInstrCandidate(unsigned Opc) { 3139 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3140 } 3141 3142 // 3143 // Utility routine that checks if \param MO is defined by an 3144 // \param CombineOpc instruction in the basic block \param MBB 3145 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3146 unsigned CombineOpc, unsigned ZeroReg = 0, 3147 bool CheckZeroReg = false) { 3148 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3149 MachineInstr *MI = nullptr; 3150 3151 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 3152 MI = MRI.getUniqueVRegDef(MO.getReg()); 3153 // And it needs to be in the trace (otherwise, it won't have a depth). 3154 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3155 return false; 3156 // Must only used by the user we combine with. 3157 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3158 return false; 3159 3160 if (CheckZeroReg) { 3161 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3162 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3163 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3164 // The third input reg must be zero. 3165 if (MI->getOperand(3).getReg() != ZeroReg) 3166 return false; 3167 } 3168 3169 return true; 3170 } 3171 3172 // 3173 // Is \param MO defined by an integer multiply and can be combined? 3174 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3175 unsigned MulOpc, unsigned ZeroReg) { 3176 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3177 } 3178 3179 // 3180 // Is \param MO defined by a floating-point multiply and can be combined? 3181 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3182 unsigned MulOpc) { 3183 return canCombine(MBB, MO, MulOpc); 3184 } 3185 3186 // TODO: There are many more machine instruction opcodes to match: 3187 // 1. Other data types (integer, vectors) 3188 // 2. Other math / logic operations (xor, or) 3189 // 3. Other forms of the same operation (intrinsics and other variants) 3190 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { 3191 switch (Inst.getOpcode()) { 3192 case AArch64::FADDDrr: 3193 case AArch64::FADDSrr: 3194 case AArch64::FADDv2f32: 3195 case AArch64::FADDv2f64: 3196 case AArch64::FADDv4f32: 3197 case AArch64::FMULDrr: 3198 case AArch64::FMULSrr: 3199 case AArch64::FMULX32: 3200 case AArch64::FMULX64: 3201 case AArch64::FMULXv2f32: 3202 case AArch64::FMULXv2f64: 3203 case AArch64::FMULXv4f32: 3204 case AArch64::FMULv2f32: 3205 case AArch64::FMULv2f64: 3206 case AArch64::FMULv4f32: 3207 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3208 default: 3209 return false; 3210 } 3211 } 3212 3213 /// Find instructions that can be turned into madd. 3214 static bool getMaddPatterns(MachineInstr &Root, 3215 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3216 unsigned Opc = Root.getOpcode(); 3217 MachineBasicBlock &MBB = *Root.getParent(); 3218 bool Found = false; 3219 3220 if (!isCombineInstrCandidate(Opc)) 3221 return false; 3222 if (isCombineInstrSettingFlag(Opc)) { 3223 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3224 // When NZCV is live bail out. 3225 if (Cmp_NZCV == -1) 3226 return false; 3227 unsigned NewOpc = convertFlagSettingOpcode(Root); 3228 // When opcode can't change bail out. 3229 // CHECKME: do we miss any cases for opcode conversion? 3230 if (NewOpc == Opc) 3231 return false; 3232 Opc = NewOpc; 3233 } 3234 3235 switch (Opc) { 3236 default: 3237 break; 3238 case AArch64::ADDWrr: 3239 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3240 "ADDWrr does not have register operands"); 3241 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3242 AArch64::WZR)) { 3243 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 3244 Found = true; 3245 } 3246 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3247 AArch64::WZR)) { 3248 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 3249 Found = true; 3250 } 3251 break; 3252 case AArch64::ADDXrr: 3253 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3254 AArch64::XZR)) { 3255 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 3256 Found = true; 3257 } 3258 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3259 AArch64::XZR)) { 3260 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 3261 Found = true; 3262 } 3263 break; 3264 case AArch64::SUBWrr: 3265 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3266 AArch64::WZR)) { 3267 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 3268 Found = true; 3269 } 3270 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3271 AArch64::WZR)) { 3272 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 3273 Found = true; 3274 } 3275 break; 3276 case AArch64::SUBXrr: 3277 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3278 AArch64::XZR)) { 3279 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 3280 Found = true; 3281 } 3282 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3283 AArch64::XZR)) { 3284 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 3285 Found = true; 3286 } 3287 break; 3288 case AArch64::ADDWri: 3289 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3290 AArch64::WZR)) { 3291 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 3292 Found = true; 3293 } 3294 break; 3295 case AArch64::ADDXri: 3296 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3297 AArch64::XZR)) { 3298 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 3299 Found = true; 3300 } 3301 break; 3302 case AArch64::SUBWri: 3303 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3304 AArch64::WZR)) { 3305 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 3306 Found = true; 3307 } 3308 break; 3309 case AArch64::SUBXri: 3310 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3311 AArch64::XZR)) { 3312 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 3313 Found = true; 3314 } 3315 break; 3316 } 3317 return Found; 3318 } 3319 /// Floating-Point Support 3320 3321 /// Find instructions that can be turned into madd. 3322 static bool getFMAPatterns(MachineInstr &Root, 3323 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3324 3325 if (!isCombineInstrCandidateFP(Root)) 3326 return false; 3327 3328 MachineBasicBlock &MBB = *Root.getParent(); 3329 bool Found = false; 3330 3331 switch (Root.getOpcode()) { 3332 default: 3333 assert(false && "Unsupported FP instruction in combiner\n"); 3334 break; 3335 case AArch64::FADDSrr: 3336 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3337 "FADDWrr does not have register operands"); 3338 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3339 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); 3340 Found = true; 3341 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3342 AArch64::FMULv1i32_indexed)) { 3343 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); 3344 Found = true; 3345 } 3346 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3347 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); 3348 Found = true; 3349 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3350 AArch64::FMULv1i32_indexed)) { 3351 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); 3352 Found = true; 3353 } 3354 break; 3355 case AArch64::FADDDrr: 3356 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3357 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); 3358 Found = true; 3359 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3360 AArch64::FMULv1i64_indexed)) { 3361 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); 3362 Found = true; 3363 } 3364 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3365 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); 3366 Found = true; 3367 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3368 AArch64::FMULv1i64_indexed)) { 3369 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); 3370 Found = true; 3371 } 3372 break; 3373 case AArch64::FADDv2f32: 3374 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3375 AArch64::FMULv2i32_indexed)) { 3376 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); 3377 Found = true; 3378 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3379 AArch64::FMULv2f32)) { 3380 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); 3381 Found = true; 3382 } 3383 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3384 AArch64::FMULv2i32_indexed)) { 3385 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); 3386 Found = true; 3387 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3388 AArch64::FMULv2f32)) { 3389 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); 3390 Found = true; 3391 } 3392 break; 3393 case AArch64::FADDv2f64: 3394 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3395 AArch64::FMULv2i64_indexed)) { 3396 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); 3397 Found = true; 3398 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3399 AArch64::FMULv2f64)) { 3400 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); 3401 Found = true; 3402 } 3403 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3404 AArch64::FMULv2i64_indexed)) { 3405 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); 3406 Found = true; 3407 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3408 AArch64::FMULv2f64)) { 3409 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); 3410 Found = true; 3411 } 3412 break; 3413 case AArch64::FADDv4f32: 3414 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3415 AArch64::FMULv4i32_indexed)) { 3416 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); 3417 Found = true; 3418 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3419 AArch64::FMULv4f32)) { 3420 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); 3421 Found = true; 3422 } 3423 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3424 AArch64::FMULv4i32_indexed)) { 3425 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); 3426 Found = true; 3427 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3428 AArch64::FMULv4f32)) { 3429 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); 3430 Found = true; 3431 } 3432 break; 3433 3434 case AArch64::FSUBSrr: 3435 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3436 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); 3437 Found = true; 3438 } 3439 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3440 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); 3441 Found = true; 3442 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3443 AArch64::FMULv1i32_indexed)) { 3444 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); 3445 Found = true; 3446 } 3447 break; 3448 case AArch64::FSUBDrr: 3449 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3450 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); 3451 Found = true; 3452 } 3453 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3454 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); 3455 Found = true; 3456 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3457 AArch64::FMULv1i64_indexed)) { 3458 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); 3459 Found = true; 3460 } 3461 break; 3462 case AArch64::FSUBv2f32: 3463 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3464 AArch64::FMULv2i32_indexed)) { 3465 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); 3466 Found = true; 3467 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3468 AArch64::FMULv2f32)) { 3469 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); 3470 Found = true; 3471 } 3472 break; 3473 case AArch64::FSUBv2f64: 3474 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3475 AArch64::FMULv2i64_indexed)) { 3476 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); 3477 Found = true; 3478 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3479 AArch64::FMULv2f64)) { 3480 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); 3481 Found = true; 3482 } 3483 break; 3484 case AArch64::FSUBv4f32: 3485 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3486 AArch64::FMULv4i32_indexed)) { 3487 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); 3488 Found = true; 3489 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3490 AArch64::FMULv4f32)) { 3491 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); 3492 Found = true; 3493 } 3494 break; 3495 } 3496 return Found; 3497 } 3498 3499 /// Return true when a code sequence can improve throughput. It 3500 /// should be called only for instructions in loops. 3501 /// \param Pattern - combiner pattern 3502 bool 3503 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { 3504 switch (Pattern) { 3505 default: 3506 break; 3507 case MachineCombinerPattern::FMULADDS_OP1: 3508 case MachineCombinerPattern::FMULADDS_OP2: 3509 case MachineCombinerPattern::FMULSUBS_OP1: 3510 case MachineCombinerPattern::FMULSUBS_OP2: 3511 case MachineCombinerPattern::FMULADDD_OP1: 3512 case MachineCombinerPattern::FMULADDD_OP2: 3513 case MachineCombinerPattern::FMULSUBD_OP1: 3514 case MachineCombinerPattern::FMULSUBD_OP2: 3515 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3516 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3517 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3518 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3519 case MachineCombinerPattern::FMLAv2f32_OP2: 3520 case MachineCombinerPattern::FMLAv2f32_OP1: 3521 case MachineCombinerPattern::FMLAv2f64_OP1: 3522 case MachineCombinerPattern::FMLAv2f64_OP2: 3523 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3524 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3525 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3526 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3527 case MachineCombinerPattern::FMLAv4f32_OP1: 3528 case MachineCombinerPattern::FMLAv4f32_OP2: 3529 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3530 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3531 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3532 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3533 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3534 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3535 case MachineCombinerPattern::FMLSv2f32_OP2: 3536 case MachineCombinerPattern::FMLSv2f64_OP2: 3537 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3538 case MachineCombinerPattern::FMLSv4f32_OP2: 3539 return true; 3540 } // end switch (Pattern) 3541 return false; 3542 } 3543 /// Return true when there is potentially a faster code sequence for an 3544 /// instruction chain ending in \p Root. All potential patterns are listed in 3545 /// the \p Pattern vector. Pattern should be sorted in priority order since the 3546 /// pattern evaluator stops checking as soon as it finds a faster sequence. 3547 3548 bool AArch64InstrInfo::getMachineCombinerPatterns( 3549 MachineInstr &Root, 3550 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 3551 // Integer patterns 3552 if (getMaddPatterns(Root, Patterns)) 3553 return true; 3554 // Floating point patterns 3555 if (getFMAPatterns(Root, Patterns)) 3556 return true; 3557 3558 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 3559 } 3560 3561 enum class FMAInstKind { Default, Indexed, Accumulator }; 3562 /// genFusedMultiply - Generate fused multiply instructions. 3563 /// This function supports both integer and floating point instructions. 3564 /// A typical example: 3565 /// F|MUL I=A,B,0 3566 /// F|ADD R,I,C 3567 /// ==> F|MADD R,A,B,C 3568 /// \param Root is the F|ADD instruction 3569 /// \param [out] InsInstrs is a vector of machine instructions and will 3570 /// contain the generated madd instruction 3571 /// \param IdxMulOpd is index of operand in Root that is the result of 3572 /// the F|MUL. In the example above IdxMulOpd is 1. 3573 /// \param MaddOpc the opcode fo the f|madd instruction 3574 static MachineInstr * 3575 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 3576 const TargetInstrInfo *TII, MachineInstr &Root, 3577 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 3578 unsigned MaddOpc, const TargetRegisterClass *RC, 3579 FMAInstKind kind = FMAInstKind::Default) { 3580 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3581 3582 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 3583 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3584 unsigned ResultReg = Root.getOperand(0).getReg(); 3585 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 3586 bool Src0IsKill = MUL->getOperand(1).isKill(); 3587 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 3588 bool Src1IsKill = MUL->getOperand(2).isKill(); 3589 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 3590 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 3591 3592 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 3593 MRI.constrainRegClass(ResultReg, RC); 3594 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 3595 MRI.constrainRegClass(SrcReg0, RC); 3596 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 3597 MRI.constrainRegClass(SrcReg1, RC); 3598 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 3599 MRI.constrainRegClass(SrcReg2, RC); 3600 3601 MachineInstrBuilder MIB; 3602 if (kind == FMAInstKind::Default) 3603 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3604 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3605 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3606 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 3607 else if (kind == FMAInstKind::Indexed) 3608 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3609 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3610 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3611 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3612 .addImm(MUL->getOperand(3).getImm()); 3613 else if (kind == FMAInstKind::Accumulator) 3614 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3615 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 3616 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3617 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 3618 else 3619 assert(false && "Invalid FMA instruction kind \n"); 3620 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 3621 InsInstrs.push_back(MIB); 3622 return MUL; 3623 } 3624 3625 /// genMaddR - Generate madd instruction and combine mul and add using 3626 /// an extra virtual register 3627 /// Example - an ADD intermediate needs to be stored in a register: 3628 /// MUL I=A,B,0 3629 /// ADD R,I,Imm 3630 /// ==> ORR V, ZR, Imm 3631 /// ==> MADD R,A,B,V 3632 /// \param Root is the ADD instruction 3633 /// \param [out] InsInstrs is a vector of machine instructions and will 3634 /// contain the generated madd instruction 3635 /// \param IdxMulOpd is index of operand in Root that is the result of 3636 /// the MUL. In the example above IdxMulOpd is 1. 3637 /// \param MaddOpc the opcode fo the madd instruction 3638 /// \param VR is a virtual register that holds the value of an ADD operand 3639 /// (V in the example above). 3640 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 3641 const TargetInstrInfo *TII, MachineInstr &Root, 3642 SmallVectorImpl<MachineInstr *> &InsInstrs, 3643 unsigned IdxMulOpd, unsigned MaddOpc, 3644 unsigned VR, const TargetRegisterClass *RC) { 3645 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3646 3647 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3648 unsigned ResultReg = Root.getOperand(0).getReg(); 3649 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 3650 bool Src0IsKill = MUL->getOperand(1).isKill(); 3651 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 3652 bool Src1IsKill = MUL->getOperand(2).isKill(); 3653 3654 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 3655 MRI.constrainRegClass(ResultReg, RC); 3656 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 3657 MRI.constrainRegClass(SrcReg0, RC); 3658 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 3659 MRI.constrainRegClass(SrcReg1, RC); 3660 if (TargetRegisterInfo::isVirtualRegister(VR)) 3661 MRI.constrainRegClass(VR, RC); 3662 3663 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 3664 ResultReg) 3665 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3666 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 3667 .addReg(VR); 3668 // Insert the MADD 3669 InsInstrs.push_back(MIB); 3670 return MUL; 3671 } 3672 3673 /// When getMachineCombinerPatterns() finds potential patterns, 3674 /// this function generates the instructions that could replace the 3675 /// original code sequence 3676 void AArch64InstrInfo::genAlternativeCodeSequence( 3677 MachineInstr &Root, MachineCombinerPattern Pattern, 3678 SmallVectorImpl<MachineInstr *> &InsInstrs, 3679 SmallVectorImpl<MachineInstr *> &DelInstrs, 3680 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 3681 MachineBasicBlock &MBB = *Root.getParent(); 3682 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3683 MachineFunction &MF = *MBB.getParent(); 3684 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 3685 3686 MachineInstr *MUL; 3687 const TargetRegisterClass *RC; 3688 unsigned Opc; 3689 switch (Pattern) { 3690 default: 3691 // Reassociate instructions. 3692 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 3693 DelInstrs, InstrIdxForVirtReg); 3694 return; 3695 case MachineCombinerPattern::MULADDW_OP1: 3696 case MachineCombinerPattern::MULADDX_OP1: 3697 // MUL I=A,B,0 3698 // ADD R,I,C 3699 // ==> MADD R,A,B,C 3700 // --- Create(MADD); 3701 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 3702 Opc = AArch64::MADDWrrr; 3703 RC = &AArch64::GPR32RegClass; 3704 } else { 3705 Opc = AArch64::MADDXrrr; 3706 RC = &AArch64::GPR64RegClass; 3707 } 3708 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 3709 break; 3710 case MachineCombinerPattern::MULADDW_OP2: 3711 case MachineCombinerPattern::MULADDX_OP2: 3712 // MUL I=A,B,0 3713 // ADD R,C,I 3714 // ==> MADD R,A,B,C 3715 // --- Create(MADD); 3716 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 3717 Opc = AArch64::MADDWrrr; 3718 RC = &AArch64::GPR32RegClass; 3719 } else { 3720 Opc = AArch64::MADDXrrr; 3721 RC = &AArch64::GPR64RegClass; 3722 } 3723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3724 break; 3725 case MachineCombinerPattern::MULADDWI_OP1: 3726 case MachineCombinerPattern::MULADDXI_OP1: { 3727 // MUL I=A,B,0 3728 // ADD R,I,Imm 3729 // ==> ORR V, ZR, Imm 3730 // ==> MADD R,A,B,V 3731 // --- Create(MADD); 3732 const TargetRegisterClass *OrrRC; 3733 unsigned BitSize, OrrOpc, ZeroReg; 3734 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 3735 OrrOpc = AArch64::ORRWri; 3736 OrrRC = &AArch64::GPR32spRegClass; 3737 BitSize = 32; 3738 ZeroReg = AArch64::WZR; 3739 Opc = AArch64::MADDWrrr; 3740 RC = &AArch64::GPR32RegClass; 3741 } else { 3742 OrrOpc = AArch64::ORRXri; 3743 OrrRC = &AArch64::GPR64spRegClass; 3744 BitSize = 64; 3745 ZeroReg = AArch64::XZR; 3746 Opc = AArch64::MADDXrrr; 3747 RC = &AArch64::GPR64RegClass; 3748 } 3749 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 3750 uint64_t Imm = Root.getOperand(2).getImm(); 3751 3752 if (Root.getOperand(3).isImm()) { 3753 unsigned Val = Root.getOperand(3).getImm(); 3754 Imm = Imm << Val; 3755 } 3756 uint64_t UImm = SignExtend64(Imm, BitSize); 3757 uint64_t Encoding; 3758 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 3759 MachineInstrBuilder MIB1 = 3760 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 3761 .addReg(ZeroReg) 3762 .addImm(Encoding); 3763 InsInstrs.push_back(MIB1); 3764 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3765 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3766 } 3767 break; 3768 } 3769 case MachineCombinerPattern::MULSUBW_OP1: 3770 case MachineCombinerPattern::MULSUBX_OP1: { 3771 // MUL I=A,B,0 3772 // SUB R,I, C 3773 // ==> SUB V, 0, C 3774 // ==> MADD R,A,B,V // = -C + A*B 3775 // --- Create(MADD); 3776 const TargetRegisterClass *SubRC; 3777 unsigned SubOpc, ZeroReg; 3778 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 3779 SubOpc = AArch64::SUBWrr; 3780 SubRC = &AArch64::GPR32spRegClass; 3781 ZeroReg = AArch64::WZR; 3782 Opc = AArch64::MADDWrrr; 3783 RC = &AArch64::GPR32RegClass; 3784 } else { 3785 SubOpc = AArch64::SUBXrr; 3786 SubRC = &AArch64::GPR64spRegClass; 3787 ZeroReg = AArch64::XZR; 3788 Opc = AArch64::MADDXrrr; 3789 RC = &AArch64::GPR64RegClass; 3790 } 3791 unsigned NewVR = MRI.createVirtualRegister(SubRC); 3792 // SUB NewVR, 0, C 3793 MachineInstrBuilder MIB1 = 3794 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 3795 .addReg(ZeroReg) 3796 .addOperand(Root.getOperand(2)); 3797 InsInstrs.push_back(MIB1); 3798 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3799 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3800 break; 3801 } 3802 case MachineCombinerPattern::MULSUBW_OP2: 3803 case MachineCombinerPattern::MULSUBX_OP2: 3804 // MUL I=A,B,0 3805 // SUB R,C,I 3806 // ==> MSUB R,A,B,C (computes C - A*B) 3807 // --- Create(MSUB); 3808 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 3809 Opc = AArch64::MSUBWrrr; 3810 RC = &AArch64::GPR32RegClass; 3811 } else { 3812 Opc = AArch64::MSUBXrrr; 3813 RC = &AArch64::GPR64RegClass; 3814 } 3815 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3816 break; 3817 case MachineCombinerPattern::MULSUBWI_OP1: 3818 case MachineCombinerPattern::MULSUBXI_OP1: { 3819 // MUL I=A,B,0 3820 // SUB R,I, Imm 3821 // ==> ORR V, ZR, -Imm 3822 // ==> MADD R,A,B,V // = -Imm + A*B 3823 // --- Create(MADD); 3824 const TargetRegisterClass *OrrRC; 3825 unsigned BitSize, OrrOpc, ZeroReg; 3826 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 3827 OrrOpc = AArch64::ORRWri; 3828 OrrRC = &AArch64::GPR32spRegClass; 3829 BitSize = 32; 3830 ZeroReg = AArch64::WZR; 3831 Opc = AArch64::MADDWrrr; 3832 RC = &AArch64::GPR32RegClass; 3833 } else { 3834 OrrOpc = AArch64::ORRXri; 3835 OrrRC = &AArch64::GPR64spRegClass; 3836 BitSize = 64; 3837 ZeroReg = AArch64::XZR; 3838 Opc = AArch64::MADDXrrr; 3839 RC = &AArch64::GPR64RegClass; 3840 } 3841 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 3842 uint64_t Imm = Root.getOperand(2).getImm(); 3843 if (Root.getOperand(3).isImm()) { 3844 unsigned Val = Root.getOperand(3).getImm(); 3845 Imm = Imm << Val; 3846 } 3847 uint64_t UImm = SignExtend64(-Imm, BitSize); 3848 uint64_t Encoding; 3849 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 3850 MachineInstrBuilder MIB1 = 3851 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 3852 .addReg(ZeroReg) 3853 .addImm(Encoding); 3854 InsInstrs.push_back(MIB1); 3855 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 3856 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 3857 } 3858 break; 3859 } 3860 // Floating Point Support 3861 case MachineCombinerPattern::FMULADDS_OP1: 3862 case MachineCombinerPattern::FMULADDD_OP1: 3863 // MUL I=A,B,0 3864 // ADD R,I,C 3865 // ==> MADD R,A,B,C 3866 // --- Create(MADD); 3867 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { 3868 Opc = AArch64::FMADDSrrr; 3869 RC = &AArch64::FPR32RegClass; 3870 } else { 3871 Opc = AArch64::FMADDDrrr; 3872 RC = &AArch64::FPR64RegClass; 3873 } 3874 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 3875 break; 3876 case MachineCombinerPattern::FMULADDS_OP2: 3877 case MachineCombinerPattern::FMULADDD_OP2: 3878 // FMUL I=A,B,0 3879 // FADD R,C,I 3880 // ==> FMADD R,A,B,C 3881 // --- Create(FMADD); 3882 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { 3883 Opc = AArch64::FMADDSrrr; 3884 RC = &AArch64::FPR32RegClass; 3885 } else { 3886 Opc = AArch64::FMADDDrrr; 3887 RC = &AArch64::FPR64RegClass; 3888 } 3889 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 3890 break; 3891 3892 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3893 Opc = AArch64::FMLAv1i32_indexed; 3894 RC = &AArch64::FPR32RegClass; 3895 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3896 FMAInstKind::Indexed); 3897 break; 3898 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3899 Opc = AArch64::FMLAv1i32_indexed; 3900 RC = &AArch64::FPR32RegClass; 3901 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3902 FMAInstKind::Indexed); 3903 break; 3904 3905 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3906 Opc = AArch64::FMLAv1i64_indexed; 3907 RC = &AArch64::FPR64RegClass; 3908 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3909 FMAInstKind::Indexed); 3910 break; 3911 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3912 Opc = AArch64::FMLAv1i64_indexed; 3913 RC = &AArch64::FPR64RegClass; 3914 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3915 FMAInstKind::Indexed); 3916 break; 3917 3918 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3919 case MachineCombinerPattern::FMLAv2f32_OP1: 3920 RC = &AArch64::FPR64RegClass; 3921 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 3922 Opc = AArch64::FMLAv2i32_indexed; 3923 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3924 FMAInstKind::Indexed); 3925 } else { 3926 Opc = AArch64::FMLAv2f32; 3927 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3928 FMAInstKind::Accumulator); 3929 } 3930 break; 3931 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3932 case MachineCombinerPattern::FMLAv2f32_OP2: 3933 RC = &AArch64::FPR64RegClass; 3934 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 3935 Opc = AArch64::FMLAv2i32_indexed; 3936 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3937 FMAInstKind::Indexed); 3938 } else { 3939 Opc = AArch64::FMLAv2f32; 3940 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3941 FMAInstKind::Accumulator); 3942 } 3943 break; 3944 3945 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3946 case MachineCombinerPattern::FMLAv2f64_OP1: 3947 RC = &AArch64::FPR128RegClass; 3948 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 3949 Opc = AArch64::FMLAv2i64_indexed; 3950 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3951 FMAInstKind::Indexed); 3952 } else { 3953 Opc = AArch64::FMLAv2f64; 3954 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3955 FMAInstKind::Accumulator); 3956 } 3957 break; 3958 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3959 case MachineCombinerPattern::FMLAv2f64_OP2: 3960 RC = &AArch64::FPR128RegClass; 3961 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 3962 Opc = AArch64::FMLAv2i64_indexed; 3963 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3964 FMAInstKind::Indexed); 3965 } else { 3966 Opc = AArch64::FMLAv2f64; 3967 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3968 FMAInstKind::Accumulator); 3969 } 3970 break; 3971 3972 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3973 case MachineCombinerPattern::FMLAv4f32_OP1: 3974 RC = &AArch64::FPR128RegClass; 3975 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 3976 Opc = AArch64::FMLAv4i32_indexed; 3977 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3978 FMAInstKind::Indexed); 3979 } else { 3980 Opc = AArch64::FMLAv4f32; 3981 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 3982 FMAInstKind::Accumulator); 3983 } 3984 break; 3985 3986 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3987 case MachineCombinerPattern::FMLAv4f32_OP2: 3988 RC = &AArch64::FPR128RegClass; 3989 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 3990 Opc = AArch64::FMLAv4i32_indexed; 3991 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3992 FMAInstKind::Indexed); 3993 } else { 3994 Opc = AArch64::FMLAv4f32; 3995 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 3996 FMAInstKind::Accumulator); 3997 } 3998 break; 3999 4000 case MachineCombinerPattern::FMULSUBS_OP1: 4001 case MachineCombinerPattern::FMULSUBD_OP1: { 4002 // FMUL I=A,B,0 4003 // FSUB R,I,C 4004 // ==> FNMSUB R,A,B,C // = -C + A*B 4005 // --- Create(FNMSUB); 4006 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { 4007 Opc = AArch64::FNMSUBSrrr; 4008 RC = &AArch64::FPR32RegClass; 4009 } else { 4010 Opc = AArch64::FNMSUBDrrr; 4011 RC = &AArch64::FPR64RegClass; 4012 } 4013 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4014 break; 4015 } 4016 case MachineCombinerPattern::FMULSUBS_OP2: 4017 case MachineCombinerPattern::FMULSUBD_OP2: { 4018 // FMUL I=A,B,0 4019 // FSUB R,C,I 4020 // ==> FMSUB R,A,B,C (computes C - A*B) 4021 // --- Create(FMSUB); 4022 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { 4023 Opc = AArch64::FMSUBSrrr; 4024 RC = &AArch64::FPR32RegClass; 4025 } else { 4026 Opc = AArch64::FMSUBDrrr; 4027 RC = &AArch64::FPR64RegClass; 4028 } 4029 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4030 break; 4031 4032 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4033 Opc = AArch64::FMLSv1i32_indexed; 4034 RC = &AArch64::FPR32RegClass; 4035 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4036 FMAInstKind::Indexed); 4037 break; 4038 4039 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4040 Opc = AArch64::FMLSv1i64_indexed; 4041 RC = &AArch64::FPR64RegClass; 4042 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4043 FMAInstKind::Indexed); 4044 break; 4045 4046 case MachineCombinerPattern::FMLSv2f32_OP2: 4047 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4048 RC = &AArch64::FPR64RegClass; 4049 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 4050 Opc = AArch64::FMLSv2i32_indexed; 4051 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4052 FMAInstKind::Indexed); 4053 } else { 4054 Opc = AArch64::FMLSv2f32; 4055 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4056 FMAInstKind::Accumulator); 4057 } 4058 break; 4059 4060 case MachineCombinerPattern::FMLSv2f64_OP2: 4061 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4062 RC = &AArch64::FPR128RegClass; 4063 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 4064 Opc = AArch64::FMLSv2i64_indexed; 4065 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4066 FMAInstKind::Indexed); 4067 } else { 4068 Opc = AArch64::FMLSv2f64; 4069 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4070 FMAInstKind::Accumulator); 4071 } 4072 break; 4073 4074 case MachineCombinerPattern::FMLSv4f32_OP2: 4075 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4076 RC = &AArch64::FPR128RegClass; 4077 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 4078 Opc = AArch64::FMLSv4i32_indexed; 4079 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4080 FMAInstKind::Indexed); 4081 } else { 4082 Opc = AArch64::FMLSv4f32; 4083 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4084 FMAInstKind::Accumulator); 4085 } 4086 break; 4087 } 4088 } // end switch (Pattern) 4089 // Record MUL and ADD/SUB for deletion 4090 DelInstrs.push_back(MUL); 4091 DelInstrs.push_back(&Root); 4092 } 4093 4094 /// \brief Replace csincr-branch sequence by simple conditional branch 4095 /// 4096 /// Examples: 4097 /// 1. 4098 /// csinc w9, wzr, wzr, <condition code> 4099 /// tbnz w9, #0, 0x44 4100 /// to 4101 /// b.<inverted condition code> 4102 /// 4103 /// 2. 4104 /// csinc w9, wzr, wzr, <condition code> 4105 /// tbz w9, #0, 0x44 4106 /// to 4107 /// b.<condition code> 4108 /// 4109 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 4110 /// compare's constant operand is power of 2. 4111 /// 4112 /// Examples: 4113 /// and w8, w8, #0x400 4114 /// cbnz w8, L1 4115 /// to 4116 /// tbnz w8, #10, L1 4117 /// 4118 /// \param MI Conditional Branch 4119 /// \return True when the simple conditional branch is generated 4120 /// 4121 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 4122 bool IsNegativeBranch = false; 4123 bool IsTestAndBranch = false; 4124 unsigned TargetBBInMI = 0; 4125 switch (MI.getOpcode()) { 4126 default: 4127 llvm_unreachable("Unknown branch instruction?"); 4128 case AArch64::Bcc: 4129 return false; 4130 case AArch64::CBZW: 4131 case AArch64::CBZX: 4132 TargetBBInMI = 1; 4133 break; 4134 case AArch64::CBNZW: 4135 case AArch64::CBNZX: 4136 TargetBBInMI = 1; 4137 IsNegativeBranch = true; 4138 break; 4139 case AArch64::TBZW: 4140 case AArch64::TBZX: 4141 TargetBBInMI = 2; 4142 IsTestAndBranch = true; 4143 break; 4144 case AArch64::TBNZW: 4145 case AArch64::TBNZX: 4146 TargetBBInMI = 2; 4147 IsNegativeBranch = true; 4148 IsTestAndBranch = true; 4149 break; 4150 } 4151 // So we increment a zero register and test for bits other 4152 // than bit 0? Conservatively bail out in case the verifier 4153 // missed this case. 4154 if (IsTestAndBranch && MI.getOperand(1).getImm()) 4155 return false; 4156 4157 // Find Definition. 4158 assert(MI.getParent() && "Incomplete machine instruciton\n"); 4159 MachineBasicBlock *MBB = MI.getParent(); 4160 MachineFunction *MF = MBB->getParent(); 4161 MachineRegisterInfo *MRI = &MF->getRegInfo(); 4162 unsigned VReg = MI.getOperand(0).getReg(); 4163 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 4164 return false; 4165 4166 MachineInstr *DefMI = MRI->getVRegDef(VReg); 4167 4168 // Look through COPY instructions to find definition. 4169 while (DefMI->isCopy()) { 4170 unsigned CopyVReg = DefMI->getOperand(1).getReg(); 4171 if (!MRI->hasOneNonDBGUse(CopyVReg)) 4172 return false; 4173 if (!MRI->hasOneDef(CopyVReg)) 4174 return false; 4175 DefMI = MRI->getVRegDef(CopyVReg); 4176 } 4177 4178 switch (DefMI->getOpcode()) { 4179 default: 4180 return false; 4181 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 4182 case AArch64::ANDWri: 4183 case AArch64::ANDXri: { 4184 if (IsTestAndBranch) 4185 return false; 4186 if (DefMI->getParent() != MBB) 4187 return false; 4188 if (!MRI->hasOneNonDBGUse(VReg)) 4189 return false; 4190 4191 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 4192 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 4193 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 4194 if (!isPowerOf2_64(Mask)) 4195 return false; 4196 4197 MachineOperand &MO = DefMI->getOperand(1); 4198 unsigned NewReg = MO.getReg(); 4199 if (!TargetRegisterInfo::isVirtualRegister(NewReg)) 4200 return false; 4201 4202 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 4203 4204 MachineBasicBlock &RefToMBB = *MBB; 4205 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 4206 DebugLoc DL = MI.getDebugLoc(); 4207 unsigned Imm = Log2_64(Mask); 4208 unsigned Opc = (Imm < 32) 4209 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 4210 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 4211 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 4212 .addReg(NewReg) 4213 .addImm(Imm) 4214 .addMBB(TBB); 4215 // Register lives on to the CBZ now. 4216 MO.setIsKill(false); 4217 4218 // For immediate smaller than 32, we need to use the 32-bit 4219 // variant (W) in all cases. Indeed the 64-bit variant does not 4220 // allow to encode them. 4221 // Therefore, if the input register is 64-bit, we need to take the 4222 // 32-bit sub-part. 4223 if (!Is32Bit && Imm < 32) 4224 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 4225 MI.eraseFromParent(); 4226 return true; 4227 } 4228 // Look for CSINC 4229 case AArch64::CSINCWr: 4230 case AArch64::CSINCXr: { 4231 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 4232 DefMI->getOperand(2).getReg() == AArch64::WZR) && 4233 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 4234 DefMI->getOperand(2).getReg() == AArch64::XZR)) 4235 return false; 4236 4237 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 4238 return false; 4239 4240 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 4241 // Convert only when the condition code is not modified between 4242 // the CSINC and the branch. The CC may be used by other 4243 // instructions in between. 4244 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 4245 return false; 4246 MachineBasicBlock &RefToMBB = *MBB; 4247 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 4248 DebugLoc DL = MI.getDebugLoc(); 4249 if (IsNegativeBranch) 4250 CC = AArch64CC::getInvertedCondCode(CC); 4251 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 4252 MI.eraseFromParent(); 4253 return true; 4254 } 4255 } 4256 } 4257 4258 std::pair<unsigned, unsigned> 4259 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 4260 const unsigned Mask = AArch64II::MO_FRAGMENT; 4261 return std::make_pair(TF & Mask, TF & ~Mask); 4262 } 4263 4264 ArrayRef<std::pair<unsigned, const char *>> 4265 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 4266 using namespace AArch64II; 4267 4268 static const std::pair<unsigned, const char *> TargetFlags[] = { 4269 {MO_PAGE, "aarch64-page"}, 4270 {MO_PAGEOFF, "aarch64-pageoff"}, 4271 {MO_G3, "aarch64-g3"}, 4272 {MO_G2, "aarch64-g2"}, 4273 {MO_G1, "aarch64-g1"}, 4274 {MO_G0, "aarch64-g0"}, 4275 {MO_HI12, "aarch64-hi12"}}; 4276 return makeArrayRef(TargetFlags); 4277 } 4278 4279 ArrayRef<std::pair<unsigned, const char *>> 4280 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 4281 using namespace AArch64II; 4282 4283 static const std::pair<unsigned, const char *> TargetFlags[] = { 4284 {MO_GOT, "aarch64-got"}, 4285 {MO_NC, "aarch64-nc"}, 4286 {MO_TLS, "aarch64-tls"}}; 4287 return makeArrayRef(TargetFlags); 4288 } 4289