1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64Subtarget.h" 16 #include "MCTargetDesc/AArch64AddressingModes.h" 17 #include "Utils/AArch64BaseInfo.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineModuleInfo.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/MC/MCAsmInfo.h" 36 #include "llvm/MC/MCInst.h" 37 #include "llvm/MC/MCInstrDesc.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CodeGen.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/MathExtras.h" 44 #include "llvm/Target/TargetMachine.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include <cassert> 47 #include <cstdint> 48 #include <iterator> 49 #include <utility> 50 51 using namespace llvm; 52 53 #define GET_INSTRINFO_CTOR_DTOR 54 #include "AArch64GenInstrInfo.inc" 55 56 static cl::opt<unsigned> TBZDisplacementBits( 57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), 58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); 59 60 static cl::opt<unsigned> CBZDisplacementBits( 61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), 62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); 63 64 static cl::opt<unsigned> 65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), 66 cl::desc("Restrict range of Bcc instructions (DEBUG)")); 67 68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, 70 AArch64::CATCHRET), 71 RI(STI.getTargetTriple()), Subtarget(STI) {} 72 73 /// GetInstSize - Return the number of bytes of code the specified 74 /// instruction may be. This returns the maximum number of bytes. 75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 76 const MachineBasicBlock &MBB = *MI.getParent(); 77 const MachineFunction *MF = MBB.getParent(); 78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 79 80 { 81 auto Op = MI.getOpcode(); 82 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR) 83 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 84 } 85 86 // Meta-instructions emit no code. 87 if (MI.isMetaInstruction()) 88 return 0; 89 90 // FIXME: We currently only handle pseudoinstructions that don't get expanded 91 // before the assembly printer. 92 unsigned NumBytes = 0; 93 const MCInstrDesc &Desc = MI.getDesc(); 94 switch (Desc.getOpcode()) { 95 default: 96 // Anything not explicitly designated otherwise is a normal 4-byte insn. 97 NumBytes = 4; 98 break; 99 case TargetOpcode::STACKMAP: 100 // The upper bound for a stackmap intrinsic is the full length of its shadow 101 NumBytes = StackMapOpers(&MI).getNumPatchBytes(); 102 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 103 break; 104 case TargetOpcode::PATCHPOINT: 105 // The size of the patchpoint intrinsic is the number of bytes requested 106 NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); 107 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 108 break; 109 case AArch64::TLSDESC_CALLSEQ: 110 // This gets lowered to an instruction sequence which takes 16 bytes 111 NumBytes = 16; 112 break; 113 case AArch64::JumpTableDest32: 114 case AArch64::JumpTableDest16: 115 case AArch64::JumpTableDest8: 116 NumBytes = 12; 117 break; 118 case AArch64::SPACE: 119 NumBytes = MI.getOperand(1).getImm(); 120 break; 121 } 122 123 return NumBytes; 124 } 125 126 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 127 SmallVectorImpl<MachineOperand> &Cond) { 128 // Block ends with fall-through condbranch. 129 switch (LastInst->getOpcode()) { 130 default: 131 llvm_unreachable("Unknown branch instruction?"); 132 case AArch64::Bcc: 133 Target = LastInst->getOperand(1).getMBB(); 134 Cond.push_back(LastInst->getOperand(0)); 135 break; 136 case AArch64::CBZW: 137 case AArch64::CBZX: 138 case AArch64::CBNZW: 139 case AArch64::CBNZX: 140 Target = LastInst->getOperand(1).getMBB(); 141 Cond.push_back(MachineOperand::CreateImm(-1)); 142 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 143 Cond.push_back(LastInst->getOperand(0)); 144 break; 145 case AArch64::TBZW: 146 case AArch64::TBZX: 147 case AArch64::TBNZW: 148 case AArch64::TBNZX: 149 Target = LastInst->getOperand(2).getMBB(); 150 Cond.push_back(MachineOperand::CreateImm(-1)); 151 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 152 Cond.push_back(LastInst->getOperand(0)); 153 Cond.push_back(LastInst->getOperand(1)); 154 } 155 } 156 157 static unsigned getBranchDisplacementBits(unsigned Opc) { 158 switch (Opc) { 159 default: 160 llvm_unreachable("unexpected opcode!"); 161 case AArch64::B: 162 return 64; 163 case AArch64::TBNZW: 164 case AArch64::TBZW: 165 case AArch64::TBNZX: 166 case AArch64::TBZX: 167 return TBZDisplacementBits; 168 case AArch64::CBNZW: 169 case AArch64::CBZW: 170 case AArch64::CBNZX: 171 case AArch64::CBZX: 172 return CBZDisplacementBits; 173 case AArch64::Bcc: 174 return BCCDisplacementBits; 175 } 176 } 177 178 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, 179 int64_t BrOffset) const { 180 unsigned Bits = getBranchDisplacementBits(BranchOp); 181 assert(Bits >= 3 && "max branch displacement must be enough to jump" 182 "over conditional branch expansion"); 183 return isIntN(Bits, BrOffset / 4); 184 } 185 186 MachineBasicBlock * 187 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 188 switch (MI.getOpcode()) { 189 default: 190 llvm_unreachable("unexpected opcode!"); 191 case AArch64::B: 192 return MI.getOperand(0).getMBB(); 193 case AArch64::TBZW: 194 case AArch64::TBNZW: 195 case AArch64::TBZX: 196 case AArch64::TBNZX: 197 return MI.getOperand(2).getMBB(); 198 case AArch64::CBZW: 199 case AArch64::CBNZW: 200 case AArch64::CBZX: 201 case AArch64::CBNZX: 202 case AArch64::Bcc: 203 return MI.getOperand(1).getMBB(); 204 } 205 } 206 207 // Branch analysis. 208 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, 209 MachineBasicBlock *&TBB, 210 MachineBasicBlock *&FBB, 211 SmallVectorImpl<MachineOperand> &Cond, 212 bool AllowModify) const { 213 // If the block has no terminators, it just falls into the block after it. 214 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 215 if (I == MBB.end()) 216 return false; 217 218 if (!isUnpredicatedTerminator(*I)) 219 return false; 220 221 // Get the last instruction in the block. 222 MachineInstr *LastInst = &*I; 223 224 // If there is only one terminator instruction, process it. 225 unsigned LastOpc = LastInst->getOpcode(); 226 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 227 if (isUncondBranchOpcode(LastOpc)) { 228 TBB = LastInst->getOperand(0).getMBB(); 229 return false; 230 } 231 if (isCondBranchOpcode(LastOpc)) { 232 // Block ends with fall-through condbranch. 233 parseCondBranch(LastInst, TBB, Cond); 234 return false; 235 } 236 return true; // Can't handle indirect branch. 237 } 238 239 // Get the instruction before it if it is a terminator. 240 MachineInstr *SecondLastInst = &*I; 241 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 242 243 // If AllowModify is true and the block ends with two or more unconditional 244 // branches, delete all but the first unconditional branch. 245 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 246 while (isUncondBranchOpcode(SecondLastOpc)) { 247 LastInst->eraseFromParent(); 248 LastInst = SecondLastInst; 249 LastOpc = LastInst->getOpcode(); 250 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 251 // Return now the only terminator is an unconditional branch. 252 TBB = LastInst->getOperand(0).getMBB(); 253 return false; 254 } else { 255 SecondLastInst = &*I; 256 SecondLastOpc = SecondLastInst->getOpcode(); 257 } 258 } 259 } 260 261 // If there are three terminators, we don't know what sort of block this is. 262 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) 263 return true; 264 265 // If the block ends with a B and a Bcc, handle it. 266 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 267 parseCondBranch(SecondLastInst, TBB, Cond); 268 FBB = LastInst->getOperand(0).getMBB(); 269 return false; 270 } 271 272 // If the block ends with two unconditional branches, handle it. The second 273 // one is not executed, so remove it. 274 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 275 TBB = SecondLastInst->getOperand(0).getMBB(); 276 I = LastInst; 277 if (AllowModify) 278 I->eraseFromParent(); 279 return false; 280 } 281 282 // ...likewise if it ends with an indirect branch followed by an unconditional 283 // branch. 284 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 285 I = LastInst; 286 if (AllowModify) 287 I->eraseFromParent(); 288 return true; 289 } 290 291 // Otherwise, can't handle this. 292 return true; 293 } 294 295 bool AArch64InstrInfo::reverseBranchCondition( 296 SmallVectorImpl<MachineOperand> &Cond) const { 297 if (Cond[0].getImm() != -1) { 298 // Regular Bcc 299 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 300 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 301 } else { 302 // Folded compare-and-branch 303 switch (Cond[1].getImm()) { 304 default: 305 llvm_unreachable("Unknown conditional branch!"); 306 case AArch64::CBZW: 307 Cond[1].setImm(AArch64::CBNZW); 308 break; 309 case AArch64::CBNZW: 310 Cond[1].setImm(AArch64::CBZW); 311 break; 312 case AArch64::CBZX: 313 Cond[1].setImm(AArch64::CBNZX); 314 break; 315 case AArch64::CBNZX: 316 Cond[1].setImm(AArch64::CBZX); 317 break; 318 case AArch64::TBZW: 319 Cond[1].setImm(AArch64::TBNZW); 320 break; 321 case AArch64::TBNZW: 322 Cond[1].setImm(AArch64::TBZW); 323 break; 324 case AArch64::TBZX: 325 Cond[1].setImm(AArch64::TBNZX); 326 break; 327 case AArch64::TBNZX: 328 Cond[1].setImm(AArch64::TBZX); 329 break; 330 } 331 } 332 333 return false; 334 } 335 336 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, 337 int *BytesRemoved) const { 338 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 339 if (I == MBB.end()) 340 return 0; 341 342 if (!isUncondBranchOpcode(I->getOpcode()) && 343 !isCondBranchOpcode(I->getOpcode())) 344 return 0; 345 346 // Remove the branch. 347 I->eraseFromParent(); 348 349 I = MBB.end(); 350 351 if (I == MBB.begin()) { 352 if (BytesRemoved) 353 *BytesRemoved = 4; 354 return 1; 355 } 356 --I; 357 if (!isCondBranchOpcode(I->getOpcode())) { 358 if (BytesRemoved) 359 *BytesRemoved = 4; 360 return 1; 361 } 362 363 // Remove the branch. 364 I->eraseFromParent(); 365 if (BytesRemoved) 366 *BytesRemoved = 8; 367 368 return 2; 369 } 370 371 void AArch64InstrInfo::instantiateCondBranch( 372 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, 373 ArrayRef<MachineOperand> Cond) const { 374 if (Cond[0].getImm() != -1) { 375 // Regular Bcc 376 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 377 } else { 378 // Folded compare-and-branch 379 // Note that we use addOperand instead of addReg to keep the flags. 380 const MachineInstrBuilder MIB = 381 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); 382 if (Cond.size() > 3) 383 MIB.addImm(Cond[3].getImm()); 384 MIB.addMBB(TBB); 385 } 386 } 387 388 unsigned AArch64InstrInfo::insertBranch( 389 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 390 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 391 // Shouldn't be a fall through. 392 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 393 394 if (!FBB) { 395 if (Cond.empty()) // Unconditional branch? 396 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 397 else 398 instantiateCondBranch(MBB, DL, TBB, Cond); 399 400 if (BytesAdded) 401 *BytesAdded = 4; 402 403 return 1; 404 } 405 406 // Two-way conditional branch. 407 instantiateCondBranch(MBB, DL, TBB, Cond); 408 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 409 410 if (BytesAdded) 411 *BytesAdded = 8; 412 413 return 2; 414 } 415 416 // Find the original register that VReg is copied from. 417 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 418 while (Register::isVirtualRegister(VReg)) { 419 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 420 if (!DefMI->isFullCopy()) 421 return VReg; 422 VReg = DefMI->getOperand(1).getReg(); 423 } 424 return VReg; 425 } 426 427 // Determine if VReg is defined by an instruction that can be folded into a 428 // csel instruction. If so, return the folded opcode, and the replacement 429 // register. 430 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 431 unsigned *NewVReg = nullptr) { 432 VReg = removeCopies(MRI, VReg); 433 if (!Register::isVirtualRegister(VReg)) 434 return 0; 435 436 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 437 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 438 unsigned Opc = 0; 439 unsigned SrcOpNum = 0; 440 switch (DefMI->getOpcode()) { 441 case AArch64::ADDSXri: 442 case AArch64::ADDSWri: 443 // if NZCV is used, do not fold. 444 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 445 return 0; 446 // fall-through to ADDXri and ADDWri. 447 LLVM_FALLTHROUGH; 448 case AArch64::ADDXri: 449 case AArch64::ADDWri: 450 // add x, 1 -> csinc. 451 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 452 DefMI->getOperand(3).getImm() != 0) 453 return 0; 454 SrcOpNum = 1; 455 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 456 break; 457 458 case AArch64::ORNXrr: 459 case AArch64::ORNWrr: { 460 // not x -> csinv, represented as orn dst, xzr, src. 461 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 462 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 463 return 0; 464 SrcOpNum = 2; 465 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 466 break; 467 } 468 469 case AArch64::SUBSXrr: 470 case AArch64::SUBSWrr: 471 // if NZCV is used, do not fold. 472 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 473 return 0; 474 // fall-through to SUBXrr and SUBWrr. 475 LLVM_FALLTHROUGH; 476 case AArch64::SUBXrr: 477 case AArch64::SUBWrr: { 478 // neg x -> csneg, represented as sub dst, xzr, src. 479 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 480 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 481 return 0; 482 SrcOpNum = 2; 483 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 484 break; 485 } 486 default: 487 return 0; 488 } 489 assert(Opc && SrcOpNum && "Missing parameters"); 490 491 if (NewVReg) 492 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 493 return Opc; 494 } 495 496 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 497 ArrayRef<MachineOperand> Cond, 498 unsigned TrueReg, unsigned FalseReg, 499 int &CondCycles, int &TrueCycles, 500 int &FalseCycles) const { 501 // Check register classes. 502 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 503 const TargetRegisterClass *RC = 504 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 505 if (!RC) 506 return false; 507 508 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 509 unsigned ExtraCondLat = Cond.size() != 1; 510 511 // GPRs are handled by csel. 512 // FIXME: Fold in x+1, -x, and ~x when applicable. 513 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 514 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 515 // Single-cycle csel, csinc, csinv, and csneg. 516 CondCycles = 1 + ExtraCondLat; 517 TrueCycles = FalseCycles = 1; 518 if (canFoldIntoCSel(MRI, TrueReg)) 519 TrueCycles = 0; 520 else if (canFoldIntoCSel(MRI, FalseReg)) 521 FalseCycles = 0; 522 return true; 523 } 524 525 // Scalar floating point is handled by fcsel. 526 // FIXME: Form fabs, fmin, and fmax when applicable. 527 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 528 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 529 CondCycles = 5 + ExtraCondLat; 530 TrueCycles = FalseCycles = 2; 531 return true; 532 } 533 534 // Can't do vectors. 535 return false; 536 } 537 538 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 539 MachineBasicBlock::iterator I, 540 const DebugLoc &DL, unsigned DstReg, 541 ArrayRef<MachineOperand> Cond, 542 unsigned TrueReg, unsigned FalseReg) const { 543 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 544 545 // Parse the condition code, see parseCondBranch() above. 546 AArch64CC::CondCode CC; 547 switch (Cond.size()) { 548 default: 549 llvm_unreachable("Unknown condition opcode in Cond"); 550 case 1: // b.cc 551 CC = AArch64CC::CondCode(Cond[0].getImm()); 552 break; 553 case 3: { // cbz/cbnz 554 // We must insert a compare against 0. 555 bool Is64Bit; 556 switch (Cond[1].getImm()) { 557 default: 558 llvm_unreachable("Unknown branch opcode in Cond"); 559 case AArch64::CBZW: 560 Is64Bit = false; 561 CC = AArch64CC::EQ; 562 break; 563 case AArch64::CBZX: 564 Is64Bit = true; 565 CC = AArch64CC::EQ; 566 break; 567 case AArch64::CBNZW: 568 Is64Bit = false; 569 CC = AArch64CC::NE; 570 break; 571 case AArch64::CBNZX: 572 Is64Bit = true; 573 CC = AArch64CC::NE; 574 break; 575 } 576 Register SrcReg = Cond[2].getReg(); 577 if (Is64Bit) { 578 // cmp reg, #0 is actually subs xzr, reg, #0. 579 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 580 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 581 .addReg(SrcReg) 582 .addImm(0) 583 .addImm(0); 584 } else { 585 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 586 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 587 .addReg(SrcReg) 588 .addImm(0) 589 .addImm(0); 590 } 591 break; 592 } 593 case 4: { // tbz/tbnz 594 // We must insert a tst instruction. 595 switch (Cond[1].getImm()) { 596 default: 597 llvm_unreachable("Unknown branch opcode in Cond"); 598 case AArch64::TBZW: 599 case AArch64::TBZX: 600 CC = AArch64CC::EQ; 601 break; 602 case AArch64::TBNZW: 603 case AArch64::TBNZX: 604 CC = AArch64CC::NE; 605 break; 606 } 607 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 608 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 609 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 610 .addReg(Cond[2].getReg()) 611 .addImm( 612 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 613 else 614 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 615 .addReg(Cond[2].getReg()) 616 .addImm( 617 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 618 break; 619 } 620 } 621 622 unsigned Opc = 0; 623 const TargetRegisterClass *RC = nullptr; 624 bool TryFold = false; 625 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 626 RC = &AArch64::GPR64RegClass; 627 Opc = AArch64::CSELXr; 628 TryFold = true; 629 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 630 RC = &AArch64::GPR32RegClass; 631 Opc = AArch64::CSELWr; 632 TryFold = true; 633 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 634 RC = &AArch64::FPR64RegClass; 635 Opc = AArch64::FCSELDrrr; 636 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 637 RC = &AArch64::FPR32RegClass; 638 Opc = AArch64::FCSELSrrr; 639 } 640 assert(RC && "Unsupported regclass"); 641 642 // Try folding simple instructions into the csel. 643 if (TryFold) { 644 unsigned NewVReg = 0; 645 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 646 if (FoldedOpc) { 647 // The folded opcodes csinc, csinc and csneg apply the operation to 648 // FalseReg, so we need to invert the condition. 649 CC = AArch64CC::getInvertedCondCode(CC); 650 TrueReg = FalseReg; 651 } else 652 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 653 654 // Fold the operation. Leave any dead instructions for DCE to clean up. 655 if (FoldedOpc) { 656 FalseReg = NewVReg; 657 Opc = FoldedOpc; 658 // The extends the live range of NewVReg. 659 MRI.clearKillFlags(NewVReg); 660 } 661 } 662 663 // Pull all virtual register into the appropriate class. 664 MRI.constrainRegClass(TrueReg, RC); 665 MRI.constrainRegClass(FalseReg, RC); 666 667 // Insert the csel. 668 BuildMI(MBB, I, DL, get(Opc), DstReg) 669 .addReg(TrueReg) 670 .addReg(FalseReg) 671 .addImm(CC); 672 } 673 674 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. 675 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { 676 uint64_t Imm = MI.getOperand(1).getImm(); 677 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 678 uint64_t Encoding; 679 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); 680 } 681 682 // FIXME: this implementation should be micro-architecture dependent, so a 683 // micro-architecture target hook should be introduced here in future. 684 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 685 if (!Subtarget.hasCustomCheapAsMoveHandling()) 686 return MI.isAsCheapAsAMove(); 687 688 const unsigned Opcode = MI.getOpcode(); 689 690 // Firstly, check cases gated by features. 691 692 if (Subtarget.hasZeroCycleZeroingFP()) { 693 if (Opcode == AArch64::FMOVH0 || 694 Opcode == AArch64::FMOVS0 || 695 Opcode == AArch64::FMOVD0) 696 return true; 697 } 698 699 if (Subtarget.hasZeroCycleZeroingGP()) { 700 if (Opcode == TargetOpcode::COPY && 701 (MI.getOperand(1).getReg() == AArch64::WZR || 702 MI.getOperand(1).getReg() == AArch64::XZR)) 703 return true; 704 } 705 706 // Secondly, check cases specific to sub-targets. 707 708 if (Subtarget.hasExynosCheapAsMoveHandling()) { 709 if (isExynosCheapAsMove(MI)) 710 return true; 711 712 return MI.isAsCheapAsAMove(); 713 } 714 715 // Finally, check generic cases. 716 717 switch (Opcode) { 718 default: 719 return false; 720 721 // add/sub on register without shift 722 case AArch64::ADDWri: 723 case AArch64::ADDXri: 724 case AArch64::SUBWri: 725 case AArch64::SUBXri: 726 return (MI.getOperand(3).getImm() == 0); 727 728 // logical ops on immediate 729 case AArch64::ANDWri: 730 case AArch64::ANDXri: 731 case AArch64::EORWri: 732 case AArch64::EORXri: 733 case AArch64::ORRWri: 734 case AArch64::ORRXri: 735 return true; 736 737 // logical ops on register without shift 738 case AArch64::ANDWrr: 739 case AArch64::ANDXrr: 740 case AArch64::BICWrr: 741 case AArch64::BICXrr: 742 case AArch64::EONWrr: 743 case AArch64::EONXrr: 744 case AArch64::EORWrr: 745 case AArch64::EORXrr: 746 case AArch64::ORNWrr: 747 case AArch64::ORNXrr: 748 case AArch64::ORRWrr: 749 case AArch64::ORRXrr: 750 return true; 751 752 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or 753 // ORRXri, it is as cheap as MOV 754 case AArch64::MOVi32imm: 755 return canBeExpandedToORR(MI, 32); 756 case AArch64::MOVi64imm: 757 return canBeExpandedToORR(MI, 64); 758 } 759 760 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 761 } 762 763 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) { 764 switch (MI.getOpcode()) { 765 default: 766 return false; 767 768 case AArch64::ADDWrs: 769 case AArch64::ADDXrs: 770 case AArch64::ADDSWrs: 771 case AArch64::ADDSXrs: { 772 unsigned Imm = MI.getOperand(3).getImm(); 773 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 774 if (ShiftVal == 0) 775 return true; 776 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; 777 } 778 779 case AArch64::ADDWrx: 780 case AArch64::ADDXrx: 781 case AArch64::ADDXrx64: 782 case AArch64::ADDSWrx: 783 case AArch64::ADDSXrx: 784 case AArch64::ADDSXrx64: { 785 unsigned Imm = MI.getOperand(3).getImm(); 786 switch (AArch64_AM::getArithExtendType(Imm)) { 787 default: 788 return false; 789 case AArch64_AM::UXTB: 790 case AArch64_AM::UXTH: 791 case AArch64_AM::UXTW: 792 case AArch64_AM::UXTX: 793 return AArch64_AM::getArithShiftValue(Imm) <= 4; 794 } 795 } 796 797 case AArch64::SUBWrs: 798 case AArch64::SUBSWrs: { 799 unsigned Imm = MI.getOperand(3).getImm(); 800 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 801 return ShiftVal == 0 || 802 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); 803 } 804 805 case AArch64::SUBXrs: 806 case AArch64::SUBSXrs: { 807 unsigned Imm = MI.getOperand(3).getImm(); 808 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); 809 return ShiftVal == 0 || 810 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); 811 } 812 813 case AArch64::SUBWrx: 814 case AArch64::SUBXrx: 815 case AArch64::SUBXrx64: 816 case AArch64::SUBSWrx: 817 case AArch64::SUBSXrx: 818 case AArch64::SUBSXrx64: { 819 unsigned Imm = MI.getOperand(3).getImm(); 820 switch (AArch64_AM::getArithExtendType(Imm)) { 821 default: 822 return false; 823 case AArch64_AM::UXTB: 824 case AArch64_AM::UXTH: 825 case AArch64_AM::UXTW: 826 case AArch64_AM::UXTX: 827 return AArch64_AM::getArithShiftValue(Imm) == 0; 828 } 829 } 830 831 case AArch64::LDRBBroW: 832 case AArch64::LDRBBroX: 833 case AArch64::LDRBroW: 834 case AArch64::LDRBroX: 835 case AArch64::LDRDroW: 836 case AArch64::LDRDroX: 837 case AArch64::LDRHHroW: 838 case AArch64::LDRHHroX: 839 case AArch64::LDRHroW: 840 case AArch64::LDRHroX: 841 case AArch64::LDRQroW: 842 case AArch64::LDRQroX: 843 case AArch64::LDRSBWroW: 844 case AArch64::LDRSBWroX: 845 case AArch64::LDRSBXroW: 846 case AArch64::LDRSBXroX: 847 case AArch64::LDRSHWroW: 848 case AArch64::LDRSHWroX: 849 case AArch64::LDRSHXroW: 850 case AArch64::LDRSHXroX: 851 case AArch64::LDRSWroW: 852 case AArch64::LDRSWroX: 853 case AArch64::LDRSroW: 854 case AArch64::LDRSroX: 855 case AArch64::LDRWroW: 856 case AArch64::LDRWroX: 857 case AArch64::LDRXroW: 858 case AArch64::LDRXroX: 859 case AArch64::PRFMroW: 860 case AArch64::PRFMroX: 861 case AArch64::STRBBroW: 862 case AArch64::STRBBroX: 863 case AArch64::STRBroW: 864 case AArch64::STRBroX: 865 case AArch64::STRDroW: 866 case AArch64::STRDroX: 867 case AArch64::STRHHroW: 868 case AArch64::STRHHroX: 869 case AArch64::STRHroW: 870 case AArch64::STRHroX: 871 case AArch64::STRQroW: 872 case AArch64::STRQroX: 873 case AArch64::STRSroW: 874 case AArch64::STRSroX: 875 case AArch64::STRWroW: 876 case AArch64::STRWroX: 877 case AArch64::STRXroW: 878 case AArch64::STRXroX: { 879 unsigned IsSigned = MI.getOperand(3).getImm(); 880 return !IsSigned; 881 } 882 } 883 } 884 885 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { 886 unsigned Opc = MI.getOpcode(); 887 switch (Opc) { 888 default: 889 return false; 890 case AArch64::SEH_StackAlloc: 891 case AArch64::SEH_SaveFPLR: 892 case AArch64::SEH_SaveFPLR_X: 893 case AArch64::SEH_SaveReg: 894 case AArch64::SEH_SaveReg_X: 895 case AArch64::SEH_SaveRegP: 896 case AArch64::SEH_SaveRegP_X: 897 case AArch64::SEH_SaveFReg: 898 case AArch64::SEH_SaveFReg_X: 899 case AArch64::SEH_SaveFRegP: 900 case AArch64::SEH_SaveFRegP_X: 901 case AArch64::SEH_SetFP: 902 case AArch64::SEH_AddFP: 903 case AArch64::SEH_Nop: 904 case AArch64::SEH_PrologEnd: 905 case AArch64::SEH_EpilogStart: 906 case AArch64::SEH_EpilogEnd: 907 return true; 908 } 909 } 910 911 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 912 unsigned &SrcReg, unsigned &DstReg, 913 unsigned &SubIdx) const { 914 switch (MI.getOpcode()) { 915 default: 916 return false; 917 case AArch64::SBFMXri: // aka sxtw 918 case AArch64::UBFMXri: // aka uxtw 919 // Check for the 32 -> 64 bit extension case, these instructions can do 920 // much more. 921 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 922 return false; 923 // This is a signed or unsigned 32 -> 64 bit extension. 924 SrcReg = MI.getOperand(1).getReg(); 925 DstReg = MI.getOperand(0).getReg(); 926 SubIdx = AArch64::sub_32; 927 return true; 928 } 929 } 930 931 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( 932 const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const { 933 const TargetRegisterInfo *TRI = &getRegisterInfo(); 934 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 935 int64_t OffsetA = 0, OffsetB = 0; 936 unsigned WidthA = 0, WidthB = 0; 937 938 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 939 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 940 941 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 942 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 943 return false; 944 945 // Retrieve the base, offset from the base and width. Width 946 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 947 // base are identical, and the offset of a lower memory access + 948 // the width doesn't overlap the offset of a higher memory access, 949 // then the memory accesses are different. 950 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 951 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 952 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 953 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 954 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 955 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 956 if (LowOffset + LowWidth <= HighOffset) 957 return true; 958 } 959 } 960 return false; 961 } 962 963 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, 964 const MachineBasicBlock *MBB, 965 const MachineFunction &MF) const { 966 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) 967 return true; 968 switch (MI.getOpcode()) { 969 case AArch64::HINT: 970 // CSDB hints are scheduling barriers. 971 if (MI.getOperand(0).getImm() == 0x14) 972 return true; 973 break; 974 case AArch64::DSB: 975 case AArch64::ISB: 976 // DSB and ISB also are scheduling barriers. 977 return true; 978 default:; 979 } 980 return isSEHInstruction(MI); 981 } 982 983 /// analyzeCompare - For a comparison instruction, return the source registers 984 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 985 /// Return true if the comparison instruction can be analyzed. 986 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 987 unsigned &SrcReg2, int &CmpMask, 988 int &CmpValue) const { 989 // The first operand can be a frame index where we'd normally expect a 990 // register. 991 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); 992 if (!MI.getOperand(1).isReg()) 993 return false; 994 995 switch (MI.getOpcode()) { 996 default: 997 break; 998 case AArch64::SUBSWrr: 999 case AArch64::SUBSWrs: 1000 case AArch64::SUBSWrx: 1001 case AArch64::SUBSXrr: 1002 case AArch64::SUBSXrs: 1003 case AArch64::SUBSXrx: 1004 case AArch64::ADDSWrr: 1005 case AArch64::ADDSWrs: 1006 case AArch64::ADDSWrx: 1007 case AArch64::ADDSXrr: 1008 case AArch64::ADDSXrs: 1009 case AArch64::ADDSXrx: 1010 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1011 SrcReg = MI.getOperand(1).getReg(); 1012 SrcReg2 = MI.getOperand(2).getReg(); 1013 CmpMask = ~0; 1014 CmpValue = 0; 1015 return true; 1016 case AArch64::SUBSWri: 1017 case AArch64::ADDSWri: 1018 case AArch64::SUBSXri: 1019 case AArch64::ADDSXri: 1020 SrcReg = MI.getOperand(1).getReg(); 1021 SrcReg2 = 0; 1022 CmpMask = ~0; 1023 // FIXME: In order to convert CmpValue to 0 or 1 1024 CmpValue = MI.getOperand(2).getImm() != 0; 1025 return true; 1026 case AArch64::ANDSWri: 1027 case AArch64::ANDSXri: 1028 // ANDS does not use the same encoding scheme as the others xxxS 1029 // instructions. 1030 SrcReg = MI.getOperand(1).getReg(); 1031 SrcReg2 = 0; 1032 CmpMask = ~0; 1033 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 1034 // while the type of CmpValue is int. When converting uint64_t to int, 1035 // the high 32 bits of uint64_t will be lost. 1036 // In fact it causes a bug in spec2006-483.xalancbmk 1037 // CmpValue is only used to compare with zero in OptimizeCompareInstr 1038 CmpValue = AArch64_AM::decodeLogicalImmediate( 1039 MI.getOperand(2).getImm(), 1040 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; 1041 return true; 1042 } 1043 1044 return false; 1045 } 1046 1047 static bool UpdateOperandRegClass(MachineInstr &Instr) { 1048 MachineBasicBlock *MBB = Instr.getParent(); 1049 assert(MBB && "Can't get MachineBasicBlock here"); 1050 MachineFunction *MF = MBB->getParent(); 1051 assert(MF && "Can't get MachineFunction here"); 1052 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1053 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1054 MachineRegisterInfo *MRI = &MF->getRegInfo(); 1055 1056 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx; 1057 ++OpIdx) { 1058 MachineOperand &MO = Instr.getOperand(OpIdx); 1059 const TargetRegisterClass *OpRegCstraints = 1060 Instr.getRegClassConstraint(OpIdx, TII, TRI); 1061 1062 // If there's no constraint, there's nothing to do. 1063 if (!OpRegCstraints) 1064 continue; 1065 // If the operand is a frame index, there's nothing to do here. 1066 // A frame index operand will resolve correctly during PEI. 1067 if (MO.isFI()) 1068 continue; 1069 1070 assert(MO.isReg() && 1071 "Operand has register constraints without being a register!"); 1072 1073 Register Reg = MO.getReg(); 1074 if (Register::isPhysicalRegister(Reg)) { 1075 if (!OpRegCstraints->contains(Reg)) 1076 return false; 1077 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 1078 !MRI->constrainRegClass(Reg, OpRegCstraints)) 1079 return false; 1080 } 1081 1082 return true; 1083 } 1084 1085 /// Return the opcode that does not set flags when possible - otherwise 1086 /// return the original opcode. The caller is responsible to do the actual 1087 /// substitution and legality checking. 1088 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { 1089 // Don't convert all compare instructions, because for some the zero register 1090 // encoding becomes the sp register. 1091 bool MIDefinesZeroReg = false; 1092 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR)) 1093 MIDefinesZeroReg = true; 1094 1095 switch (MI.getOpcode()) { 1096 default: 1097 return MI.getOpcode(); 1098 case AArch64::ADDSWrr: 1099 return AArch64::ADDWrr; 1100 case AArch64::ADDSWri: 1101 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 1102 case AArch64::ADDSWrs: 1103 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 1104 case AArch64::ADDSWrx: 1105 return AArch64::ADDWrx; 1106 case AArch64::ADDSXrr: 1107 return AArch64::ADDXrr; 1108 case AArch64::ADDSXri: 1109 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 1110 case AArch64::ADDSXrs: 1111 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 1112 case AArch64::ADDSXrx: 1113 return AArch64::ADDXrx; 1114 case AArch64::SUBSWrr: 1115 return AArch64::SUBWrr; 1116 case AArch64::SUBSWri: 1117 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 1118 case AArch64::SUBSWrs: 1119 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 1120 case AArch64::SUBSWrx: 1121 return AArch64::SUBWrx; 1122 case AArch64::SUBSXrr: 1123 return AArch64::SUBXrr; 1124 case AArch64::SUBSXri: 1125 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 1126 case AArch64::SUBSXrs: 1127 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 1128 case AArch64::SUBSXrx: 1129 return AArch64::SUBXrx; 1130 } 1131 } 1132 1133 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; 1134 1135 /// True when condition flags are accessed (either by writing or reading) 1136 /// on the instruction trace starting at From and ending at To. 1137 /// 1138 /// Note: If From and To are from different blocks it's assumed CC are accessed 1139 /// on the path. 1140 static bool areCFlagsAccessedBetweenInstrs( 1141 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, 1142 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { 1143 // Early exit if To is at the beginning of the BB. 1144 if (To == To->getParent()->begin()) 1145 return true; 1146 1147 // Check whether the instructions are in the same basic block 1148 // If not, assume the condition flags might get modified somewhere. 1149 if (To->getParent() != From->getParent()) 1150 return true; 1151 1152 // From must be above To. 1153 assert(std::find_if(++To.getReverse(), To->getParent()->rend(), 1154 [From](MachineInstr &MI) { 1155 return MI.getIterator() == From; 1156 }) != To->getParent()->rend()); 1157 1158 // We iterate backward starting \p To until we hit \p From. 1159 for (--To; To != From; --To) { 1160 const MachineInstr &Instr = *To; 1161 1162 if (((AccessToCheck & AK_Write) && 1163 Instr.modifiesRegister(AArch64::NZCV, TRI)) || 1164 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) 1165 return true; 1166 } 1167 return false; 1168 } 1169 1170 /// Try to optimize a compare instruction. A compare instruction is an 1171 /// instruction which produces AArch64::NZCV. It can be truly compare 1172 /// instruction 1173 /// when there are no uses of its destination register. 1174 /// 1175 /// The following steps are tried in order: 1176 /// 1. Convert CmpInstr into an unconditional version. 1177 /// 2. Remove CmpInstr if above there is an instruction producing a needed 1178 /// condition code or an instruction which can be converted into such an 1179 /// instruction. 1180 /// Only comparison with zero is supported. 1181 bool AArch64InstrInfo::optimizeCompareInstr( 1182 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 1183 int CmpValue, const MachineRegisterInfo *MRI) const { 1184 assert(CmpInstr.getParent()); 1185 assert(MRI); 1186 1187 // Replace SUBSWrr with SUBWrr if NZCV is not used. 1188 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true); 1189 if (DeadNZCVIdx != -1) { 1190 if (CmpInstr.definesRegister(AArch64::WZR) || 1191 CmpInstr.definesRegister(AArch64::XZR)) { 1192 CmpInstr.eraseFromParent(); 1193 return true; 1194 } 1195 unsigned Opc = CmpInstr.getOpcode(); 1196 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); 1197 if (NewOpc == Opc) 1198 return false; 1199 const MCInstrDesc &MCID = get(NewOpc); 1200 CmpInstr.setDesc(MCID); 1201 CmpInstr.RemoveOperand(DeadNZCVIdx); 1202 bool succeeded = UpdateOperandRegClass(CmpInstr); 1203 (void)succeeded; 1204 assert(succeeded && "Some operands reg class are incompatible!"); 1205 return true; 1206 } 1207 1208 // Continue only if we have a "ri" where immediate is zero. 1209 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 1210 // function. 1211 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 1212 if (CmpValue != 0 || SrcReg2 != 0) 1213 return false; 1214 1215 // CmpInstr is a Compare instruction if destination register is not used. 1216 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) 1217 return false; 1218 1219 return substituteCmpToZero(CmpInstr, SrcReg, MRI); 1220 } 1221 1222 /// Get opcode of S version of Instr. 1223 /// If Instr is S version its opcode is returned. 1224 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version 1225 /// or we are not interested in it. 1226 static unsigned sForm(MachineInstr &Instr) { 1227 switch (Instr.getOpcode()) { 1228 default: 1229 return AArch64::INSTRUCTION_LIST_END; 1230 1231 case AArch64::ADDSWrr: 1232 case AArch64::ADDSWri: 1233 case AArch64::ADDSXrr: 1234 case AArch64::ADDSXri: 1235 case AArch64::SUBSWrr: 1236 case AArch64::SUBSWri: 1237 case AArch64::SUBSXrr: 1238 case AArch64::SUBSXri: 1239 return Instr.getOpcode(); 1240 1241 case AArch64::ADDWrr: 1242 return AArch64::ADDSWrr; 1243 case AArch64::ADDWri: 1244 return AArch64::ADDSWri; 1245 case AArch64::ADDXrr: 1246 return AArch64::ADDSXrr; 1247 case AArch64::ADDXri: 1248 return AArch64::ADDSXri; 1249 case AArch64::ADCWr: 1250 return AArch64::ADCSWr; 1251 case AArch64::ADCXr: 1252 return AArch64::ADCSXr; 1253 case AArch64::SUBWrr: 1254 return AArch64::SUBSWrr; 1255 case AArch64::SUBWri: 1256 return AArch64::SUBSWri; 1257 case AArch64::SUBXrr: 1258 return AArch64::SUBSXrr; 1259 case AArch64::SUBXri: 1260 return AArch64::SUBSXri; 1261 case AArch64::SBCWr: 1262 return AArch64::SBCSWr; 1263 case AArch64::SBCXr: 1264 return AArch64::SBCSXr; 1265 case AArch64::ANDWri: 1266 return AArch64::ANDSWri; 1267 case AArch64::ANDXri: 1268 return AArch64::ANDSXri; 1269 } 1270 } 1271 1272 /// Check if AArch64::NZCV should be alive in successors of MBB. 1273 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { 1274 for (auto *BB : MBB->successors()) 1275 if (BB->isLiveIn(AArch64::NZCV)) 1276 return true; 1277 return false; 1278 } 1279 1280 namespace { 1281 1282 struct UsedNZCV { 1283 bool N = false; 1284 bool Z = false; 1285 bool C = false; 1286 bool V = false; 1287 1288 UsedNZCV() = default; 1289 1290 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 1291 this->N |= UsedFlags.N; 1292 this->Z |= UsedFlags.Z; 1293 this->C |= UsedFlags.C; 1294 this->V |= UsedFlags.V; 1295 return *this; 1296 } 1297 }; 1298 1299 } // end anonymous namespace 1300 1301 /// Find a condition code used by the instruction. 1302 /// Returns AArch64CC::Invalid if either the instruction does not use condition 1303 /// codes or we don't optimize CmpInstr in the presence of such instructions. 1304 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { 1305 switch (Instr.getOpcode()) { 1306 default: 1307 return AArch64CC::Invalid; 1308 1309 case AArch64::Bcc: { 1310 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1311 assert(Idx >= 2); 1312 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); 1313 } 1314 1315 case AArch64::CSINVWr: 1316 case AArch64::CSINVXr: 1317 case AArch64::CSINCWr: 1318 case AArch64::CSINCXr: 1319 case AArch64::CSELWr: 1320 case AArch64::CSELXr: 1321 case AArch64::CSNEGWr: 1322 case AArch64::CSNEGXr: 1323 case AArch64::FCSELSrrr: 1324 case AArch64::FCSELDrrr: { 1325 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); 1326 assert(Idx >= 1); 1327 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); 1328 } 1329 } 1330 } 1331 1332 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { 1333 assert(CC != AArch64CC::Invalid); 1334 UsedNZCV UsedFlags; 1335 switch (CC) { 1336 default: 1337 break; 1338 1339 case AArch64CC::EQ: // Z set 1340 case AArch64CC::NE: // Z clear 1341 UsedFlags.Z = true; 1342 break; 1343 1344 case AArch64CC::HI: // Z clear and C set 1345 case AArch64CC::LS: // Z set or C clear 1346 UsedFlags.Z = true; 1347 LLVM_FALLTHROUGH; 1348 case AArch64CC::HS: // C set 1349 case AArch64CC::LO: // C clear 1350 UsedFlags.C = true; 1351 break; 1352 1353 case AArch64CC::MI: // N set 1354 case AArch64CC::PL: // N clear 1355 UsedFlags.N = true; 1356 break; 1357 1358 case AArch64CC::VS: // V set 1359 case AArch64CC::VC: // V clear 1360 UsedFlags.V = true; 1361 break; 1362 1363 case AArch64CC::GT: // Z clear, N and V the same 1364 case AArch64CC::LE: // Z set, N and V differ 1365 UsedFlags.Z = true; 1366 LLVM_FALLTHROUGH; 1367 case AArch64CC::GE: // N and V the same 1368 case AArch64CC::LT: // N and V differ 1369 UsedFlags.N = true; 1370 UsedFlags.V = true; 1371 break; 1372 } 1373 return UsedFlags; 1374 } 1375 1376 static bool isADDSRegImm(unsigned Opcode) { 1377 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri; 1378 } 1379 1380 static bool isSUBSRegImm(unsigned Opcode) { 1381 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; 1382 } 1383 1384 /// Check if CmpInstr can be substituted by MI. 1385 /// 1386 /// CmpInstr can be substituted: 1387 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0' 1388 /// - and, MI and CmpInstr are from the same MachineBB 1389 /// - and, condition flags are not alive in successors of the CmpInstr parent 1390 /// - and, if MI opcode is the S form there must be no defs of flags between 1391 /// MI and CmpInstr 1392 /// or if MI opcode is not the S form there must be neither defs of flags 1393 /// nor uses of flags between MI and CmpInstr. 1394 /// - and C/V flags are not used after CmpInstr 1395 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, 1396 const TargetRegisterInfo *TRI) { 1397 assert(MI); 1398 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); 1399 assert(CmpInstr); 1400 1401 const unsigned CmpOpcode = CmpInstr->getOpcode(); 1402 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode)) 1403 return false; 1404 1405 if (MI->getParent() != CmpInstr->getParent()) 1406 return false; 1407 1408 if (areCFlagsAliveInSuccessors(CmpInstr->getParent())) 1409 return false; 1410 1411 AccessKind AccessToCheck = AK_Write; 1412 if (sForm(*MI) != MI->getOpcode()) 1413 AccessToCheck = AK_All; 1414 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck)) 1415 return false; 1416 1417 UsedNZCV NZCVUsedAfterCmp; 1418 for (auto I = std::next(CmpInstr->getIterator()), 1419 E = CmpInstr->getParent()->instr_end(); 1420 I != E; ++I) { 1421 const MachineInstr &Instr = *I; 1422 if (Instr.readsRegister(AArch64::NZCV, TRI)) { 1423 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); 1424 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction 1425 return false; 1426 NZCVUsedAfterCmp |= getUsedNZCV(CC); 1427 } 1428 1429 if (Instr.modifiesRegister(AArch64::NZCV, TRI)) 1430 break; 1431 } 1432 1433 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; 1434 } 1435 1436 /// Substitute an instruction comparing to zero with another instruction 1437 /// which produces needed condition flags. 1438 /// 1439 /// Return true on success. 1440 bool AArch64InstrInfo::substituteCmpToZero( 1441 MachineInstr &CmpInstr, unsigned SrcReg, 1442 const MachineRegisterInfo *MRI) const { 1443 assert(MRI); 1444 // Get the unique definition of SrcReg. 1445 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 1446 if (!MI) 1447 return false; 1448 1449 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1450 1451 unsigned NewOpc = sForm(*MI); 1452 if (NewOpc == AArch64::INSTRUCTION_LIST_END) 1453 return false; 1454 1455 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI)) 1456 return false; 1457 1458 // Update the instruction to set NZCV. 1459 MI->setDesc(get(NewOpc)); 1460 CmpInstr.eraseFromParent(); 1461 bool succeeded = UpdateOperandRegClass(*MI); 1462 (void)succeeded; 1463 assert(succeeded && "Some operands reg class are incompatible!"); 1464 MI->addRegisterDefined(AArch64::NZCV, TRI); 1465 return true; 1466 } 1467 1468 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1469 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && 1470 MI.getOpcode() != AArch64::CATCHRET) 1471 return false; 1472 1473 MachineBasicBlock &MBB = *MI.getParent(); 1474 DebugLoc DL = MI.getDebugLoc(); 1475 1476 if (MI.getOpcode() == AArch64::CATCHRET) { 1477 // Skip to the first instruction before the epilog. 1478 const TargetInstrInfo *TII = 1479 MBB.getParent()->getSubtarget().getInstrInfo(); 1480 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); 1481 auto MBBI = MachineBasicBlock::iterator(MI); 1482 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); 1483 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && 1484 FirstEpilogSEH != MBB.begin()) 1485 FirstEpilogSEH = std::prev(FirstEpilogSEH); 1486 if (FirstEpilogSEH != MBB.begin()) 1487 FirstEpilogSEH = std::next(FirstEpilogSEH); 1488 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) 1489 .addReg(AArch64::X0, RegState::Define) 1490 .addMBB(TargetMBB); 1491 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) 1492 .addReg(AArch64::X0, RegState::Define) 1493 .addReg(AArch64::X0) 1494 .addMBB(TargetMBB) 1495 .addImm(0); 1496 return true; 1497 } 1498 1499 Register Reg = MI.getOperand(0).getReg(); 1500 const GlobalValue *GV = 1501 cast<GlobalValue>((*MI.memoperands_begin())->getValue()); 1502 const TargetMachine &TM = MBB.getParent()->getTarget(); 1503 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 1504 const unsigned char MO_NC = AArch64II::MO_NC; 1505 1506 if ((OpFlags & AArch64II::MO_GOT) != 0) { 1507 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1508 .addGlobalAddress(GV, 0, OpFlags); 1509 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1510 .addReg(Reg, RegState::Kill) 1511 .addImm(0) 1512 .addMemOperand(*MI.memoperands_begin()); 1513 } else if (TM.getCodeModel() == CodeModel::Large) { 1514 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1515 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) 1516 .addImm(0); 1517 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1518 .addReg(Reg, RegState::Kill) 1519 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) 1520 .addImm(16); 1521 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1522 .addReg(Reg, RegState::Kill) 1523 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) 1524 .addImm(32); 1525 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1526 .addReg(Reg, RegState::Kill) 1527 .addGlobalAddress(GV, 0, AArch64II::MO_G3) 1528 .addImm(48); 1529 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1530 .addReg(Reg, RegState::Kill) 1531 .addImm(0) 1532 .addMemOperand(*MI.memoperands_begin()); 1533 } else if (TM.getCodeModel() == CodeModel::Tiny) { 1534 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg) 1535 .addGlobalAddress(GV, 0, OpFlags); 1536 } else { 1537 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1538 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1539 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1540 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1541 .addReg(Reg, RegState::Kill) 1542 .addGlobalAddress(GV, 0, LoFlags) 1543 .addMemOperand(*MI.memoperands_begin()); 1544 } 1545 1546 MBB.erase(MI); 1547 1548 return true; 1549 } 1550 1551 // Return true if this instruction simply sets its single destination register 1552 // to zero. This is equivalent to a register rename of the zero-register. 1553 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) { 1554 switch (MI.getOpcode()) { 1555 default: 1556 break; 1557 case AArch64::MOVZWi: 1558 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1559 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { 1560 assert(MI.getDesc().getNumOperands() == 3 && 1561 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1562 return true; 1563 } 1564 break; 1565 case AArch64::ANDWri: // and Rd, Rzr, #imm 1566 return MI.getOperand(1).getReg() == AArch64::WZR; 1567 case AArch64::ANDXri: 1568 return MI.getOperand(1).getReg() == AArch64::XZR; 1569 case TargetOpcode::COPY: 1570 return MI.getOperand(1).getReg() == AArch64::WZR; 1571 } 1572 return false; 1573 } 1574 1575 // Return true if this instruction simply renames a general register without 1576 // modifying bits. 1577 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { 1578 switch (MI.getOpcode()) { 1579 default: 1580 break; 1581 case TargetOpcode::COPY: { 1582 // GPR32 copies will by lowered to ORRXrs 1583 Register DstReg = MI.getOperand(0).getReg(); 1584 return (AArch64::GPR32RegClass.contains(DstReg) || 1585 AArch64::GPR64RegClass.contains(DstReg)); 1586 } 1587 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1588 if (MI.getOperand(1).getReg() == AArch64::XZR) { 1589 assert(MI.getDesc().getNumOperands() == 4 && 1590 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1591 return true; 1592 } 1593 break; 1594 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1595 if (MI.getOperand(2).getImm() == 0) { 1596 assert(MI.getDesc().getNumOperands() == 4 && 1597 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1598 return true; 1599 } 1600 break; 1601 } 1602 return false; 1603 } 1604 1605 // Return true if this instruction simply renames a general register without 1606 // modifying bits. 1607 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { 1608 switch (MI.getOpcode()) { 1609 default: 1610 break; 1611 case TargetOpcode::COPY: { 1612 // FPR64 copies will by lowered to ORR.16b 1613 Register DstReg = MI.getOperand(0).getReg(); 1614 return (AArch64::FPR64RegClass.contains(DstReg) || 1615 AArch64::FPR128RegClass.contains(DstReg)); 1616 } 1617 case AArch64::ORRv16i8: 1618 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { 1619 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() && 1620 "invalid ORRv16i8 operands"); 1621 return true; 1622 } 1623 break; 1624 } 1625 return false; 1626 } 1627 1628 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1629 int &FrameIndex) const { 1630 switch (MI.getOpcode()) { 1631 default: 1632 break; 1633 case AArch64::LDRWui: 1634 case AArch64::LDRXui: 1635 case AArch64::LDRBui: 1636 case AArch64::LDRHui: 1637 case AArch64::LDRSui: 1638 case AArch64::LDRDui: 1639 case AArch64::LDRQui: 1640 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1641 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1642 FrameIndex = MI.getOperand(1).getIndex(); 1643 return MI.getOperand(0).getReg(); 1644 } 1645 break; 1646 } 1647 1648 return 0; 1649 } 1650 1651 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1652 int &FrameIndex) const { 1653 switch (MI.getOpcode()) { 1654 default: 1655 break; 1656 case AArch64::STRWui: 1657 case AArch64::STRXui: 1658 case AArch64::STRBui: 1659 case AArch64::STRHui: 1660 case AArch64::STRSui: 1661 case AArch64::STRDui: 1662 case AArch64::STRQui: 1663 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && 1664 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { 1665 FrameIndex = MI.getOperand(1).getIndex(); 1666 return MI.getOperand(0).getReg(); 1667 } 1668 break; 1669 } 1670 return 0; 1671 } 1672 1673 /// Check all MachineMemOperands for a hint to suppress pairing. 1674 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) { 1675 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1676 return MMO->getFlags() & MOSuppressPair; 1677 }); 1678 } 1679 1680 /// Set a flag on the first MachineMemOperand to suppress pairing. 1681 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) { 1682 if (MI.memoperands_empty()) 1683 return; 1684 (*MI.memoperands_begin())->setFlags(MOSuppressPair); 1685 } 1686 1687 /// Check all MachineMemOperands for a hint that the load/store is strided. 1688 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { 1689 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { 1690 return MMO->getFlags() & MOStridedAccess; 1691 }); 1692 } 1693 1694 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { 1695 switch (Opc) { 1696 default: 1697 return false; 1698 case AArch64::STURSi: 1699 case AArch64::STURDi: 1700 case AArch64::STURQi: 1701 case AArch64::STURBBi: 1702 case AArch64::STURHHi: 1703 case AArch64::STURWi: 1704 case AArch64::STURXi: 1705 case AArch64::LDURSi: 1706 case AArch64::LDURDi: 1707 case AArch64::LDURQi: 1708 case AArch64::LDURWi: 1709 case AArch64::LDURXi: 1710 case AArch64::LDURSWi: 1711 case AArch64::LDURHHi: 1712 case AArch64::LDURBBi: 1713 case AArch64::LDURSBWi: 1714 case AArch64::LDURSHWi: 1715 return true; 1716 } 1717 } 1718 1719 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) { 1720 switch (Opc) { 1721 default: return {}; 1722 case AArch64::PRFMui: return AArch64::PRFUMi; 1723 case AArch64::LDRXui: return AArch64::LDURXi; 1724 case AArch64::LDRWui: return AArch64::LDURWi; 1725 case AArch64::LDRBui: return AArch64::LDURBi; 1726 case AArch64::LDRHui: return AArch64::LDURHi; 1727 case AArch64::LDRSui: return AArch64::LDURSi; 1728 case AArch64::LDRDui: return AArch64::LDURDi; 1729 case AArch64::LDRQui: return AArch64::LDURQi; 1730 case AArch64::LDRBBui: return AArch64::LDURBBi; 1731 case AArch64::LDRHHui: return AArch64::LDURHHi; 1732 case AArch64::LDRSBXui: return AArch64::LDURSBXi; 1733 case AArch64::LDRSBWui: return AArch64::LDURSBWi; 1734 case AArch64::LDRSHXui: return AArch64::LDURSHXi; 1735 case AArch64::LDRSHWui: return AArch64::LDURSHWi; 1736 case AArch64::LDRSWui: return AArch64::LDURSWi; 1737 case AArch64::STRXui: return AArch64::STURXi; 1738 case AArch64::STRWui: return AArch64::STURWi; 1739 case AArch64::STRBui: return AArch64::STURBi; 1740 case AArch64::STRHui: return AArch64::STURHi; 1741 case AArch64::STRSui: return AArch64::STURSi; 1742 case AArch64::STRDui: return AArch64::STURDi; 1743 case AArch64::STRQui: return AArch64::STURQi; 1744 case AArch64::STRBBui: return AArch64::STURBBi; 1745 case AArch64::STRHHui: return AArch64::STURHHi; 1746 } 1747 } 1748 1749 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { 1750 switch (Opc) { 1751 default: 1752 return 2; 1753 case AArch64::LDPXi: 1754 case AArch64::LDPDi: 1755 case AArch64::STPXi: 1756 case AArch64::STPDi: 1757 case AArch64::LDNPXi: 1758 case AArch64::LDNPDi: 1759 case AArch64::STNPXi: 1760 case AArch64::STNPDi: 1761 case AArch64::LDPQi: 1762 case AArch64::STPQi: 1763 case AArch64::LDNPQi: 1764 case AArch64::STNPQi: 1765 case AArch64::LDPWi: 1766 case AArch64::LDPSi: 1767 case AArch64::STPWi: 1768 case AArch64::STPSi: 1769 case AArch64::LDNPWi: 1770 case AArch64::LDNPSi: 1771 case AArch64::STNPWi: 1772 case AArch64::STNPSi: 1773 case AArch64::LDG: 1774 case AArch64::STGPi: 1775 return 3; 1776 case AArch64::ADDG: 1777 case AArch64::STGOffset: 1778 return 2; 1779 } 1780 } 1781 1782 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { 1783 switch (MI.getOpcode()) { 1784 default: 1785 return false; 1786 // Scaled instructions. 1787 case AArch64::STRSui: 1788 case AArch64::STRDui: 1789 case AArch64::STRQui: 1790 case AArch64::STRXui: 1791 case AArch64::STRWui: 1792 case AArch64::LDRSui: 1793 case AArch64::LDRDui: 1794 case AArch64::LDRQui: 1795 case AArch64::LDRXui: 1796 case AArch64::LDRWui: 1797 case AArch64::LDRSWui: 1798 // Unscaled instructions. 1799 case AArch64::STURSi: 1800 case AArch64::STURDi: 1801 case AArch64::STURQi: 1802 case AArch64::STURWi: 1803 case AArch64::STURXi: 1804 case AArch64::LDURSi: 1805 case AArch64::LDURDi: 1806 case AArch64::LDURQi: 1807 case AArch64::LDURWi: 1808 case AArch64::LDURXi: 1809 case AArch64::LDURSWi: 1810 return true; 1811 } 1812 } 1813 1814 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, 1815 bool &Is64Bit) { 1816 switch (Opc) { 1817 default: 1818 llvm_unreachable("Opcode has no flag setting equivalent!"); 1819 // 32-bit cases: 1820 case AArch64::ADDWri: 1821 Is64Bit = false; 1822 return AArch64::ADDSWri; 1823 case AArch64::ADDWrr: 1824 Is64Bit = false; 1825 return AArch64::ADDSWrr; 1826 case AArch64::ADDWrs: 1827 Is64Bit = false; 1828 return AArch64::ADDSWrs; 1829 case AArch64::ADDWrx: 1830 Is64Bit = false; 1831 return AArch64::ADDSWrx; 1832 case AArch64::ANDWri: 1833 Is64Bit = false; 1834 return AArch64::ANDSWri; 1835 case AArch64::ANDWrr: 1836 Is64Bit = false; 1837 return AArch64::ANDSWrr; 1838 case AArch64::ANDWrs: 1839 Is64Bit = false; 1840 return AArch64::ANDSWrs; 1841 case AArch64::BICWrr: 1842 Is64Bit = false; 1843 return AArch64::BICSWrr; 1844 case AArch64::BICWrs: 1845 Is64Bit = false; 1846 return AArch64::BICSWrs; 1847 case AArch64::SUBWri: 1848 Is64Bit = false; 1849 return AArch64::SUBSWri; 1850 case AArch64::SUBWrr: 1851 Is64Bit = false; 1852 return AArch64::SUBSWrr; 1853 case AArch64::SUBWrs: 1854 Is64Bit = false; 1855 return AArch64::SUBSWrs; 1856 case AArch64::SUBWrx: 1857 Is64Bit = false; 1858 return AArch64::SUBSWrx; 1859 // 64-bit cases: 1860 case AArch64::ADDXri: 1861 Is64Bit = true; 1862 return AArch64::ADDSXri; 1863 case AArch64::ADDXrr: 1864 Is64Bit = true; 1865 return AArch64::ADDSXrr; 1866 case AArch64::ADDXrs: 1867 Is64Bit = true; 1868 return AArch64::ADDSXrs; 1869 case AArch64::ADDXrx: 1870 Is64Bit = true; 1871 return AArch64::ADDSXrx; 1872 case AArch64::ANDXri: 1873 Is64Bit = true; 1874 return AArch64::ANDSXri; 1875 case AArch64::ANDXrr: 1876 Is64Bit = true; 1877 return AArch64::ANDSXrr; 1878 case AArch64::ANDXrs: 1879 Is64Bit = true; 1880 return AArch64::ANDSXrs; 1881 case AArch64::BICXrr: 1882 Is64Bit = true; 1883 return AArch64::BICSXrr; 1884 case AArch64::BICXrs: 1885 Is64Bit = true; 1886 return AArch64::BICSXrs; 1887 case AArch64::SUBXri: 1888 Is64Bit = true; 1889 return AArch64::SUBSXri; 1890 case AArch64::SUBXrr: 1891 Is64Bit = true; 1892 return AArch64::SUBSXrr; 1893 case AArch64::SUBXrs: 1894 Is64Bit = true; 1895 return AArch64::SUBSXrs; 1896 case AArch64::SUBXrx: 1897 Is64Bit = true; 1898 return AArch64::SUBSXrx; 1899 } 1900 } 1901 1902 // Is this a candidate for ld/st merging or pairing? For example, we don't 1903 // touch volatiles or load/stores that have a hint to avoid pair formation. 1904 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { 1905 // If this is a volatile load/store, don't mess with it. 1906 if (MI.hasOrderedMemoryRef()) 1907 return false; 1908 1909 // Make sure this is a reg/fi+imm (as opposed to an address reloc). 1910 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && 1911 "Expected a reg or frame index operand."); 1912 if (!MI.getOperand(2).isImm()) 1913 return false; 1914 1915 // Can't merge/pair if the instruction modifies the base register. 1916 // e.g., ldr x0, [x0] 1917 // This case will never occur with an FI base. 1918 if (MI.getOperand(1).isReg()) { 1919 Register BaseReg = MI.getOperand(1).getReg(); 1920 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1921 if (MI.modifiesRegister(BaseReg, TRI)) 1922 return false; 1923 } 1924 1925 // Check if this load/store has a hint to avoid pair formation. 1926 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 1927 if (isLdStPairSuppressed(MI)) 1928 return false; 1929 1930 // Do not pair any callee-save store/reload instructions in the 1931 // prologue/epilogue if the CFI information encoded the operations as separate 1932 // instructions, as that will cause the size of the actual prologue to mismatch 1933 // with the prologue size recorded in the Windows CFI. 1934 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); 1935 bool NeedsWinCFI = MAI->usesWindowsCFI() && 1936 MI.getMF()->getFunction().needsUnwindTableEntry(); 1937 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || 1938 MI.getFlag(MachineInstr::FrameDestroy))) 1939 return false; 1940 1941 // On some CPUs quad load/store pairs are slower than two single load/stores. 1942 if (Subtarget.isPaired128Slow()) { 1943 switch (MI.getOpcode()) { 1944 default: 1945 break; 1946 case AArch64::LDURQi: 1947 case AArch64::STURQi: 1948 case AArch64::LDRQui: 1949 case AArch64::STRQui: 1950 return false; 1951 } 1952 } 1953 1954 return true; 1955 } 1956 1957 bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, 1958 const MachineOperand *&BaseOp, 1959 int64_t &Offset, 1960 const TargetRegisterInfo *TRI) const { 1961 unsigned Width; 1962 return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); 1963 } 1964 1965 bool AArch64InstrInfo::getMemOperandWithOffsetWidth( 1966 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, 1967 unsigned &Width, const TargetRegisterInfo *TRI) const { 1968 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 1969 // Handle only loads/stores with base register followed by immediate offset. 1970 if (LdSt.getNumExplicitOperands() == 3) { 1971 // Non-paired instruction (e.g., ldr x1, [x0, #8]). 1972 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 1973 !LdSt.getOperand(2).isImm()) 1974 return false; 1975 } else if (LdSt.getNumExplicitOperands() == 4) { 1976 // Paired instruction (e.g., ldp x1, x2, [x0, #8]). 1977 if (!LdSt.getOperand(1).isReg() || 1978 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || 1979 !LdSt.getOperand(3).isImm()) 1980 return false; 1981 } else 1982 return false; 1983 1984 // Get the scaling factor for the instruction and set the width for the 1985 // instruction. 1986 unsigned Scale = 0; 1987 int64_t Dummy1, Dummy2; 1988 1989 // If this returns false, then it's an instruction we don't want to handle. 1990 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) 1991 return false; 1992 1993 // Compute the offset. Offset is calculated as the immediate operand 1994 // multiplied by the scaling factor. Unscaled instructions have scaling factor 1995 // set to 1. 1996 if (LdSt.getNumExplicitOperands() == 3) { 1997 BaseOp = &LdSt.getOperand(1); 1998 Offset = LdSt.getOperand(2).getImm() * Scale; 1999 } else { 2000 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); 2001 BaseOp = &LdSt.getOperand(2); 2002 Offset = LdSt.getOperand(3).getImm() * Scale; 2003 } 2004 2005 assert((BaseOp->isReg() || BaseOp->isFI()) && 2006 "getMemOperandWithOffset only supports base " 2007 "operands of type register or frame index."); 2008 2009 return true; 2010 } 2011 2012 MachineOperand & 2013 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { 2014 assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); 2015 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); 2016 assert(OfsOp.isImm() && "Offset operand wasn't immediate."); 2017 return OfsOp; 2018 } 2019 2020 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, 2021 unsigned &Width, int64_t &MinOffset, 2022 int64_t &MaxOffset) { 2023 switch (Opcode) { 2024 // Not a memory operation or something we want to handle. 2025 default: 2026 Scale = Width = 0; 2027 MinOffset = MaxOffset = 0; 2028 return false; 2029 case AArch64::STRWpost: 2030 case AArch64::LDRWpost: 2031 Width = 32; 2032 Scale = 4; 2033 MinOffset = -256; 2034 MaxOffset = 255; 2035 break; 2036 case AArch64::LDURQi: 2037 case AArch64::STURQi: 2038 Width = 16; 2039 Scale = 1; 2040 MinOffset = -256; 2041 MaxOffset = 255; 2042 break; 2043 case AArch64::PRFUMi: 2044 case AArch64::LDURXi: 2045 case AArch64::LDURDi: 2046 case AArch64::STURXi: 2047 case AArch64::STURDi: 2048 Width = 8; 2049 Scale = 1; 2050 MinOffset = -256; 2051 MaxOffset = 255; 2052 break; 2053 case AArch64::LDURWi: 2054 case AArch64::LDURSi: 2055 case AArch64::LDURSWi: 2056 case AArch64::STURWi: 2057 case AArch64::STURSi: 2058 Width = 4; 2059 Scale = 1; 2060 MinOffset = -256; 2061 MaxOffset = 255; 2062 break; 2063 case AArch64::LDURHi: 2064 case AArch64::LDURHHi: 2065 case AArch64::LDURSHXi: 2066 case AArch64::LDURSHWi: 2067 case AArch64::STURHi: 2068 case AArch64::STURHHi: 2069 Width = 2; 2070 Scale = 1; 2071 MinOffset = -256; 2072 MaxOffset = 255; 2073 break; 2074 case AArch64::LDURBi: 2075 case AArch64::LDURBBi: 2076 case AArch64::LDURSBXi: 2077 case AArch64::LDURSBWi: 2078 case AArch64::STURBi: 2079 case AArch64::STURBBi: 2080 Width = 1; 2081 Scale = 1; 2082 MinOffset = -256; 2083 MaxOffset = 255; 2084 break; 2085 case AArch64::LDPQi: 2086 case AArch64::LDNPQi: 2087 case AArch64::STPQi: 2088 case AArch64::STNPQi: 2089 Scale = 16; 2090 Width = 32; 2091 MinOffset = -64; 2092 MaxOffset = 63; 2093 break; 2094 case AArch64::LDRQui: 2095 case AArch64::STRQui: 2096 Scale = Width = 16; 2097 MinOffset = 0; 2098 MaxOffset = 4095; 2099 break; 2100 case AArch64::LDPXi: 2101 case AArch64::LDPDi: 2102 case AArch64::LDNPXi: 2103 case AArch64::LDNPDi: 2104 case AArch64::STPXi: 2105 case AArch64::STPDi: 2106 case AArch64::STNPXi: 2107 case AArch64::STNPDi: 2108 Scale = 8; 2109 Width = 16; 2110 MinOffset = -64; 2111 MaxOffset = 63; 2112 break; 2113 case AArch64::PRFMui: 2114 case AArch64::LDRXui: 2115 case AArch64::LDRDui: 2116 case AArch64::STRXui: 2117 case AArch64::STRDui: 2118 Scale = Width = 8; 2119 MinOffset = 0; 2120 MaxOffset = 4095; 2121 break; 2122 case AArch64::LDPWi: 2123 case AArch64::LDPSi: 2124 case AArch64::LDNPWi: 2125 case AArch64::LDNPSi: 2126 case AArch64::STPWi: 2127 case AArch64::STPSi: 2128 case AArch64::STNPWi: 2129 case AArch64::STNPSi: 2130 Scale = 4; 2131 Width = 8; 2132 MinOffset = -64; 2133 MaxOffset = 63; 2134 break; 2135 case AArch64::LDRWui: 2136 case AArch64::LDRSui: 2137 case AArch64::LDRSWui: 2138 case AArch64::STRWui: 2139 case AArch64::STRSui: 2140 Scale = Width = 4; 2141 MinOffset = 0; 2142 MaxOffset = 4095; 2143 break; 2144 case AArch64::LDRHui: 2145 case AArch64::LDRHHui: 2146 case AArch64::LDRSHWui: 2147 case AArch64::LDRSHXui: 2148 case AArch64::STRHui: 2149 case AArch64::STRHHui: 2150 Scale = Width = 2; 2151 MinOffset = 0; 2152 MaxOffset = 4095; 2153 break; 2154 case AArch64::LDRBui: 2155 case AArch64::LDRBBui: 2156 case AArch64::LDRSBWui: 2157 case AArch64::LDRSBXui: 2158 case AArch64::STRBui: 2159 case AArch64::STRBBui: 2160 Scale = Width = 1; 2161 MinOffset = 0; 2162 MaxOffset = 4095; 2163 break; 2164 case AArch64::ADDG: 2165 case AArch64::TAGPstack: 2166 Scale = 16; 2167 Width = 0; 2168 MinOffset = 0; 2169 MaxOffset = 63; 2170 break; 2171 case AArch64::LDG: 2172 case AArch64::STGOffset: 2173 case AArch64::STZGOffset: 2174 Scale = Width = 16; 2175 MinOffset = -256; 2176 MaxOffset = 255; 2177 break; 2178 case AArch64::ST2GOffset: 2179 case AArch64::STZ2GOffset: 2180 Scale = 16; 2181 Width = 32; 2182 MinOffset = -256; 2183 MaxOffset = 255; 2184 break; 2185 case AArch64::STGPi: 2186 Scale = Width = 16; 2187 MinOffset = -64; 2188 MaxOffset = 63; 2189 break; 2190 } 2191 2192 return true; 2193 } 2194 2195 static unsigned getOffsetStride(unsigned Opc) { 2196 switch (Opc) { 2197 default: 2198 return 0; 2199 case AArch64::LDURQi: 2200 case AArch64::STURQi: 2201 return 16; 2202 case AArch64::LDURXi: 2203 case AArch64::LDURDi: 2204 case AArch64::STURXi: 2205 case AArch64::STURDi: 2206 return 8; 2207 case AArch64::LDURWi: 2208 case AArch64::LDURSi: 2209 case AArch64::LDURSWi: 2210 case AArch64::STURWi: 2211 case AArch64::STURSi: 2212 return 4; 2213 } 2214 } 2215 2216 // Scale the unscaled offsets. Returns false if the unscaled offset can't be 2217 // scaled. 2218 static bool scaleOffset(unsigned Opc, int64_t &Offset) { 2219 unsigned OffsetStride = getOffsetStride(Opc); 2220 if (OffsetStride == 0) 2221 return false; 2222 // If the byte-offset isn't a multiple of the stride, we can't scale this 2223 // offset. 2224 if (Offset % OffsetStride != 0) 2225 return false; 2226 2227 // Convert the byte-offset used by unscaled into an "element" offset used 2228 // by the scaled pair load/store instructions. 2229 Offset /= OffsetStride; 2230 return true; 2231 } 2232 2233 // Unscale the scaled offsets. Returns false if the scaled offset can't be 2234 // unscaled. 2235 static bool unscaleOffset(unsigned Opc, int64_t &Offset) { 2236 unsigned OffsetStride = getOffsetStride(Opc); 2237 if (OffsetStride == 0) 2238 return false; 2239 2240 // Convert the "element" offset used by scaled pair load/store instructions 2241 // into the byte-offset used by unscaled. 2242 Offset *= OffsetStride; 2243 return true; 2244 } 2245 2246 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { 2247 if (FirstOpc == SecondOpc) 2248 return true; 2249 // We can also pair sign-ext and zero-ext instructions. 2250 switch (FirstOpc) { 2251 default: 2252 return false; 2253 case AArch64::LDRWui: 2254 case AArch64::LDURWi: 2255 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; 2256 case AArch64::LDRSWui: 2257 case AArch64::LDURSWi: 2258 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi; 2259 } 2260 // These instructions can't be paired based on their opcodes. 2261 return false; 2262 } 2263 2264 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, 2265 int64_t Offset1, unsigned Opcode1, int FI2, 2266 int64_t Offset2, unsigned Opcode2) { 2267 // Accesses through fixed stack object frame indices may access a different 2268 // fixed stack slot. Check that the object offsets + offsets match. 2269 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { 2270 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); 2271 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); 2272 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); 2273 // Get the byte-offset from the object offset. 2274 if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) 2275 return false; 2276 ObjectOffset1 += Offset1; 2277 ObjectOffset2 += Offset2; 2278 // Get the "element" index in the object. 2279 if (!scaleOffset(Opcode1, ObjectOffset1) || 2280 !scaleOffset(Opcode2, ObjectOffset2)) 2281 return false; 2282 return ObjectOffset1 + 1 == ObjectOffset2; 2283 } 2284 2285 return FI1 == FI2; 2286 } 2287 2288 /// Detect opportunities for ldp/stp formation. 2289 /// 2290 /// Only called for LdSt for which getMemOperandWithOffset returns true. 2291 bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, 2292 const MachineOperand &BaseOp2, 2293 unsigned NumLoads) const { 2294 const MachineInstr &FirstLdSt = *BaseOp1.getParent(); 2295 const MachineInstr &SecondLdSt = *BaseOp2.getParent(); 2296 if (BaseOp1.getType() != BaseOp2.getType()) 2297 return false; 2298 2299 assert((BaseOp1.isReg() || BaseOp1.isFI()) && 2300 "Only base registers and frame indices are supported."); 2301 2302 // Check for both base regs and base FI. 2303 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) 2304 return false; 2305 2306 // Only cluster up to a single pair. 2307 if (NumLoads > 1) 2308 return false; 2309 2310 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) 2311 return false; 2312 2313 // Can we pair these instructions based on their opcodes? 2314 unsigned FirstOpc = FirstLdSt.getOpcode(); 2315 unsigned SecondOpc = SecondLdSt.getOpcode(); 2316 if (!canPairLdStOpc(FirstOpc, SecondOpc)) 2317 return false; 2318 2319 // Can't merge volatiles or load/stores that have a hint to avoid pair 2320 // formation, for example. 2321 if (!isCandidateToMergeOrPair(FirstLdSt) || 2322 !isCandidateToMergeOrPair(SecondLdSt)) 2323 return false; 2324 2325 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. 2326 int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); 2327 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) 2328 return false; 2329 2330 int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); 2331 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) 2332 return false; 2333 2334 // Pairwise instructions have a 7-bit signed offset field. 2335 if (Offset1 > 63 || Offset1 < -64) 2336 return false; 2337 2338 // The caller should already have ordered First/SecondLdSt by offset. 2339 // Note: except for non-equal frame index bases 2340 if (BaseOp1.isFI()) { 2341 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) && 2342 "Caller should have ordered offsets."); 2343 2344 const MachineFrameInfo &MFI = 2345 FirstLdSt.getParent()->getParent()->getFrameInfo(); 2346 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, 2347 BaseOp2.getIndex(), Offset2, SecondOpc); 2348 } 2349 2350 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && 2351 "Caller should have ordered offsets."); 2352 2353 return Offset1 + 1 == Offset2; 2354 } 2355 2356 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 2357 unsigned Reg, unsigned SubIdx, 2358 unsigned State, 2359 const TargetRegisterInfo *TRI) { 2360 if (!SubIdx) 2361 return MIB.addReg(Reg, State); 2362 2363 if (Register::isPhysicalRegister(Reg)) 2364 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 2365 return MIB.addReg(Reg, State, SubIdx); 2366 } 2367 2368 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 2369 unsigned NumRegs) { 2370 // We really want the positive remainder mod 32 here, that happens to be 2371 // easily obtainable with a mask. 2372 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 2373 } 2374 2375 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, 2376 MachineBasicBlock::iterator I, 2377 const DebugLoc &DL, unsigned DestReg, 2378 unsigned SrcReg, bool KillSrc, 2379 unsigned Opcode, 2380 ArrayRef<unsigned> Indices) const { 2381 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); 2382 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2383 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2384 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2385 unsigned NumRegs = Indices.size(); 2386 2387 int SubReg = 0, End = NumRegs, Incr = 1; 2388 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 2389 SubReg = NumRegs - 1; 2390 End = -1; 2391 Incr = -1; 2392 } 2393 2394 for (; SubReg != End; SubReg += Incr) { 2395 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2396 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2397 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 2398 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2399 } 2400 } 2401 2402 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB, 2403 MachineBasicBlock::iterator I, 2404 DebugLoc DL, unsigned DestReg, 2405 unsigned SrcReg, bool KillSrc, 2406 unsigned Opcode, unsigned ZeroReg, 2407 llvm::ArrayRef<unsigned> Indices) const { 2408 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2409 unsigned NumRegs = Indices.size(); 2410 2411 #ifndef NDEBUG 2412 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 2413 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 2414 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 && 2415 "GPR reg sequences should not be able to overlap"); 2416 #endif 2417 2418 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) { 2419 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 2420 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 2421 MIB.addReg(ZeroReg); 2422 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 2423 MIB.addImm(0); 2424 } 2425 } 2426 2427 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 2428 MachineBasicBlock::iterator I, 2429 const DebugLoc &DL, unsigned DestReg, 2430 unsigned SrcReg, bool KillSrc) const { 2431 if (AArch64::GPR32spRegClass.contains(DestReg) && 2432 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 2433 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2434 2435 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 2436 // If either operand is WSP, expand to ADD #0. 2437 if (Subtarget.hasZeroCycleRegMove()) { 2438 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 2439 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2440 &AArch64::GPR64spRegClass); 2441 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2442 &AArch64::GPR64spRegClass); 2443 // This instruction is reading and writing X registers. This may upset 2444 // the register scavenger and machine verifier, so we need to indicate 2445 // that we are reading an undefined value from SrcRegX, but a proper 2446 // value from SrcReg. 2447 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 2448 .addReg(SrcRegX, RegState::Undef) 2449 .addImm(0) 2450 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2451 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2452 } else { 2453 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 2454 .addReg(SrcReg, getKillRegState(KillSrc)) 2455 .addImm(0) 2456 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2457 } 2458 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) { 2459 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) 2460 .addImm(0) 2461 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2462 } else { 2463 if (Subtarget.hasZeroCycleRegMove()) { 2464 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 2465 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 2466 &AArch64::GPR64spRegClass); 2467 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 2468 &AArch64::GPR64spRegClass); 2469 // This instruction is reading and writing X registers. This may upset 2470 // the register scavenger and machine verifier, so we need to indicate 2471 // that we are reading an undefined value from SrcRegX, but a proper 2472 // value from SrcReg. 2473 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 2474 .addReg(AArch64::XZR) 2475 .addReg(SrcRegX, RegState::Undef) 2476 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 2477 } else { 2478 // Otherwise, expand to ORR WZR. 2479 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 2480 .addReg(AArch64::WZR) 2481 .addReg(SrcReg, getKillRegState(KillSrc)); 2482 } 2483 } 2484 return; 2485 } 2486 2487 if (AArch64::GPR64spRegClass.contains(DestReg) && 2488 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 2489 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 2490 // If either operand is SP, expand to ADD #0. 2491 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 2492 .addReg(SrcReg, getKillRegState(KillSrc)) 2493 .addImm(0) 2494 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2495 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) { 2496 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) 2497 .addImm(0) 2498 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 2499 } else { 2500 // Otherwise, expand to ORR XZR. 2501 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 2502 .addReg(AArch64::XZR) 2503 .addReg(SrcReg, getKillRegState(KillSrc)); 2504 } 2505 return; 2506 } 2507 2508 // Copy a DDDD register quad by copying the individual sub-registers. 2509 if (AArch64::DDDDRegClass.contains(DestReg) && 2510 AArch64::DDDDRegClass.contains(SrcReg)) { 2511 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2512 AArch64::dsub2, AArch64::dsub3}; 2513 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2514 Indices); 2515 return; 2516 } 2517 2518 // Copy a DDD register triple by copying the individual sub-registers. 2519 if (AArch64::DDDRegClass.contains(DestReg) && 2520 AArch64::DDDRegClass.contains(SrcReg)) { 2521 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, 2522 AArch64::dsub2}; 2523 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2524 Indices); 2525 return; 2526 } 2527 2528 // Copy a DD register pair by copying the individual sub-registers. 2529 if (AArch64::DDRegClass.contains(DestReg) && 2530 AArch64::DDRegClass.contains(SrcReg)) { 2531 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; 2532 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 2533 Indices); 2534 return; 2535 } 2536 2537 // Copy a QQQQ register quad by copying the individual sub-registers. 2538 if (AArch64::QQQQRegClass.contains(DestReg) && 2539 AArch64::QQQQRegClass.contains(SrcReg)) { 2540 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2541 AArch64::qsub2, AArch64::qsub3}; 2542 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2543 Indices); 2544 return; 2545 } 2546 2547 // Copy a QQQ register triple by copying the individual sub-registers. 2548 if (AArch64::QQQRegClass.contains(DestReg) && 2549 AArch64::QQQRegClass.contains(SrcReg)) { 2550 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, 2551 AArch64::qsub2}; 2552 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2553 Indices); 2554 return; 2555 } 2556 2557 // Copy a QQ register pair by copying the individual sub-registers. 2558 if (AArch64::QQRegClass.contains(DestReg) && 2559 AArch64::QQRegClass.contains(SrcReg)) { 2560 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; 2561 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 2562 Indices); 2563 return; 2564 } 2565 2566 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) && 2567 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) { 2568 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64}; 2569 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs, 2570 AArch64::XZR, Indices); 2571 return; 2572 } 2573 2574 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) && 2575 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) { 2576 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32}; 2577 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs, 2578 AArch64::WZR, Indices); 2579 return; 2580 } 2581 2582 if (AArch64::FPR128RegClass.contains(DestReg) && 2583 AArch64::FPR128RegClass.contains(SrcReg)) { 2584 if (Subtarget.hasNEON()) { 2585 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2586 .addReg(SrcReg) 2587 .addReg(SrcReg, getKillRegState(KillSrc)); 2588 } else { 2589 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 2590 .addReg(AArch64::SP, RegState::Define) 2591 .addReg(SrcReg, getKillRegState(KillSrc)) 2592 .addReg(AArch64::SP) 2593 .addImm(-16); 2594 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 2595 .addReg(AArch64::SP, RegState::Define) 2596 .addReg(DestReg, RegState::Define) 2597 .addReg(AArch64::SP) 2598 .addImm(16); 2599 } 2600 return; 2601 } 2602 2603 if (AArch64::FPR64RegClass.contains(DestReg) && 2604 AArch64::FPR64RegClass.contains(SrcReg)) { 2605 if (Subtarget.hasNEON()) { 2606 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 2607 &AArch64::FPR128RegClass); 2608 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 2609 &AArch64::FPR128RegClass); 2610 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2611 .addReg(SrcReg) 2612 .addReg(SrcReg, getKillRegState(KillSrc)); 2613 } else { 2614 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 2615 .addReg(SrcReg, getKillRegState(KillSrc)); 2616 } 2617 return; 2618 } 2619 2620 if (AArch64::FPR32RegClass.contains(DestReg) && 2621 AArch64::FPR32RegClass.contains(SrcReg)) { 2622 if (Subtarget.hasNEON()) { 2623 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 2624 &AArch64::FPR128RegClass); 2625 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 2626 &AArch64::FPR128RegClass); 2627 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2628 .addReg(SrcReg) 2629 .addReg(SrcReg, getKillRegState(KillSrc)); 2630 } else { 2631 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2632 .addReg(SrcReg, getKillRegState(KillSrc)); 2633 } 2634 return; 2635 } 2636 2637 if (AArch64::FPR16RegClass.contains(DestReg) && 2638 AArch64::FPR16RegClass.contains(SrcReg)) { 2639 if (Subtarget.hasNEON()) { 2640 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2641 &AArch64::FPR128RegClass); 2642 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2643 &AArch64::FPR128RegClass); 2644 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2645 .addReg(SrcReg) 2646 .addReg(SrcReg, getKillRegState(KillSrc)); 2647 } else { 2648 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 2649 &AArch64::FPR32RegClass); 2650 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 2651 &AArch64::FPR32RegClass); 2652 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2653 .addReg(SrcReg, getKillRegState(KillSrc)); 2654 } 2655 return; 2656 } 2657 2658 if (AArch64::FPR8RegClass.contains(DestReg) && 2659 AArch64::FPR8RegClass.contains(SrcReg)) { 2660 if (Subtarget.hasNEON()) { 2661 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2662 &AArch64::FPR128RegClass); 2663 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2664 &AArch64::FPR128RegClass); 2665 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 2666 .addReg(SrcReg) 2667 .addReg(SrcReg, getKillRegState(KillSrc)); 2668 } else { 2669 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 2670 &AArch64::FPR32RegClass); 2671 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 2672 &AArch64::FPR32RegClass); 2673 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 2674 .addReg(SrcReg, getKillRegState(KillSrc)); 2675 } 2676 return; 2677 } 2678 2679 // Copies between GPR64 and FPR64. 2680 if (AArch64::FPR64RegClass.contains(DestReg) && 2681 AArch64::GPR64RegClass.contains(SrcReg)) { 2682 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 2683 .addReg(SrcReg, getKillRegState(KillSrc)); 2684 return; 2685 } 2686 if (AArch64::GPR64RegClass.contains(DestReg) && 2687 AArch64::FPR64RegClass.contains(SrcReg)) { 2688 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 2689 .addReg(SrcReg, getKillRegState(KillSrc)); 2690 return; 2691 } 2692 // Copies between GPR32 and FPR32. 2693 if (AArch64::FPR32RegClass.contains(DestReg) && 2694 AArch64::GPR32RegClass.contains(SrcReg)) { 2695 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 2696 .addReg(SrcReg, getKillRegState(KillSrc)); 2697 return; 2698 } 2699 if (AArch64::GPR32RegClass.contains(DestReg) && 2700 AArch64::FPR32RegClass.contains(SrcReg)) { 2701 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 2702 .addReg(SrcReg, getKillRegState(KillSrc)); 2703 return; 2704 } 2705 2706 if (DestReg == AArch64::NZCV) { 2707 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 2708 BuildMI(MBB, I, DL, get(AArch64::MSR)) 2709 .addImm(AArch64SysReg::NZCV) 2710 .addReg(SrcReg, getKillRegState(KillSrc)) 2711 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 2712 return; 2713 } 2714 2715 if (SrcReg == AArch64::NZCV) { 2716 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 2717 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) 2718 .addImm(AArch64SysReg::NZCV) 2719 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 2720 return; 2721 } 2722 2723 llvm_unreachable("unimplemented reg-to-reg copy"); 2724 } 2725 2726 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, 2727 MachineBasicBlock &MBB, 2728 MachineBasicBlock::iterator InsertBefore, 2729 const MCInstrDesc &MCID, 2730 unsigned SrcReg, bool IsKill, 2731 unsigned SubIdx0, unsigned SubIdx1, int FI, 2732 MachineMemOperand *MMO) { 2733 unsigned SrcReg0 = SrcReg; 2734 unsigned SrcReg1 = SrcReg; 2735 if (Register::isPhysicalRegister(SrcReg)) { 2736 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); 2737 SubIdx0 = 0; 2738 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); 2739 SubIdx1 = 0; 2740 } 2741 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 2742 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0) 2743 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1) 2744 .addFrameIndex(FI) 2745 .addImm(0) 2746 .addMemOperand(MMO); 2747 } 2748 2749 void AArch64InstrInfo::storeRegToStackSlot( 2750 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 2751 bool isKill, int FI, const TargetRegisterClass *RC, 2752 const TargetRegisterInfo *TRI) const { 2753 MachineFunction &MF = *MBB.getParent(); 2754 MachineFrameInfo &MFI = MF.getFrameInfo(); 2755 unsigned Align = MFI.getObjectAlignment(FI); 2756 2757 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2758 MachineMemOperand *MMO = MF.getMachineMemOperand( 2759 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 2760 unsigned Opc = 0; 2761 bool Offset = true; 2762 switch (TRI->getSpillSize(*RC)) { 2763 case 1: 2764 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2765 Opc = AArch64::STRBui; 2766 break; 2767 case 2: 2768 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2769 Opc = AArch64::STRHui; 2770 break; 2771 case 4: 2772 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2773 Opc = AArch64::STRWui; 2774 if (Register::isVirtualRegister(SrcReg)) 2775 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 2776 else 2777 assert(SrcReg != AArch64::WSP); 2778 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2779 Opc = AArch64::STRSui; 2780 break; 2781 case 8: 2782 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2783 Opc = AArch64::STRXui; 2784 if (Register::isVirtualRegister(SrcReg)) 2785 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2786 else 2787 assert(SrcReg != AArch64::SP); 2788 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 2789 Opc = AArch64::STRDui; 2790 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 2791 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 2792 get(AArch64::STPWi), SrcReg, isKill, 2793 AArch64::sube32, AArch64::subo32, FI, MMO); 2794 return; 2795 } 2796 break; 2797 case 16: 2798 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2799 Opc = AArch64::STRQui; 2800 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2801 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2802 Opc = AArch64::ST1Twov1d; 2803 Offset = false; 2804 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2805 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI, 2806 get(AArch64::STPXi), SrcReg, isKill, 2807 AArch64::sube64, AArch64::subo64, FI, MMO); 2808 return; 2809 } 2810 break; 2811 case 24: 2812 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2813 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2814 Opc = AArch64::ST1Threev1d; 2815 Offset = false; 2816 } 2817 break; 2818 case 32: 2819 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2820 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2821 Opc = AArch64::ST1Fourv1d; 2822 Offset = false; 2823 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2824 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2825 Opc = AArch64::ST1Twov2d; 2826 Offset = false; 2827 } 2828 break; 2829 case 48: 2830 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2831 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2832 Opc = AArch64::ST1Threev2d; 2833 Offset = false; 2834 } 2835 break; 2836 case 64: 2837 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2838 assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); 2839 Opc = AArch64::ST1Fourv2d; 2840 Offset = false; 2841 } 2842 break; 2843 } 2844 assert(Opc && "Unknown register class"); 2845 2846 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 2847 .addReg(SrcReg, getKillRegState(isKill)) 2848 .addFrameIndex(FI); 2849 2850 if (Offset) 2851 MI.addImm(0); 2852 MI.addMemOperand(MMO); 2853 } 2854 2855 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, 2856 MachineBasicBlock &MBB, 2857 MachineBasicBlock::iterator InsertBefore, 2858 const MCInstrDesc &MCID, 2859 unsigned DestReg, unsigned SubIdx0, 2860 unsigned SubIdx1, int FI, 2861 MachineMemOperand *MMO) { 2862 unsigned DestReg0 = DestReg; 2863 unsigned DestReg1 = DestReg; 2864 bool IsUndef = true; 2865 if (Register::isPhysicalRegister(DestReg)) { 2866 DestReg0 = TRI.getSubReg(DestReg, SubIdx0); 2867 SubIdx0 = 0; 2868 DestReg1 = TRI.getSubReg(DestReg, SubIdx1); 2869 SubIdx1 = 0; 2870 IsUndef = false; 2871 } 2872 BuildMI(MBB, InsertBefore, DebugLoc(), MCID) 2873 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0) 2874 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1) 2875 .addFrameIndex(FI) 2876 .addImm(0) 2877 .addMemOperand(MMO); 2878 } 2879 2880 void AArch64InstrInfo::loadRegFromStackSlot( 2881 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 2882 int FI, const TargetRegisterClass *RC, 2883 const TargetRegisterInfo *TRI) const { 2884 MachineFunction &MF = *MBB.getParent(); 2885 MachineFrameInfo &MFI = MF.getFrameInfo(); 2886 unsigned Align = MFI.getObjectAlignment(FI); 2887 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 2888 MachineMemOperand *MMO = MF.getMachineMemOperand( 2889 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 2890 2891 unsigned Opc = 0; 2892 bool Offset = true; 2893 switch (TRI->getSpillSize(*RC)) { 2894 case 1: 2895 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 2896 Opc = AArch64::LDRBui; 2897 break; 2898 case 2: 2899 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 2900 Opc = AArch64::LDRHui; 2901 break; 2902 case 4: 2903 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 2904 Opc = AArch64::LDRWui; 2905 if (Register::isVirtualRegister(DestReg)) 2906 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 2907 else 2908 assert(DestReg != AArch64::WSP); 2909 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 2910 Opc = AArch64::LDRSui; 2911 break; 2912 case 8: 2913 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 2914 Opc = AArch64::LDRXui; 2915 if (Register::isVirtualRegister(DestReg)) 2916 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 2917 else 2918 assert(DestReg != AArch64::SP); 2919 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) { 2920 Opc = AArch64::LDRDui; 2921 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) { 2922 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 2923 get(AArch64::LDPWi), DestReg, AArch64::sube32, 2924 AArch64::subo32, FI, MMO); 2925 return; 2926 } 2927 break; 2928 case 16: 2929 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 2930 Opc = AArch64::LDRQui; 2931 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 2932 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2933 Opc = AArch64::LD1Twov1d; 2934 Offset = false; 2935 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) { 2936 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI, 2937 get(AArch64::LDPXi), DestReg, AArch64::sube64, 2938 AArch64::subo64, FI, MMO); 2939 return; 2940 } 2941 break; 2942 case 24: 2943 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 2944 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2945 Opc = AArch64::LD1Threev1d; 2946 Offset = false; 2947 } 2948 break; 2949 case 32: 2950 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 2951 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2952 Opc = AArch64::LD1Fourv1d; 2953 Offset = false; 2954 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 2955 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2956 Opc = AArch64::LD1Twov2d; 2957 Offset = false; 2958 } 2959 break; 2960 case 48: 2961 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 2962 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2963 Opc = AArch64::LD1Threev2d; 2964 Offset = false; 2965 } 2966 break; 2967 case 64: 2968 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 2969 assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); 2970 Opc = AArch64::LD1Fourv2d; 2971 Offset = false; 2972 } 2973 break; 2974 } 2975 assert(Opc && "Unknown register class"); 2976 2977 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc)) 2978 .addReg(DestReg, getDefRegState(true)) 2979 .addFrameIndex(FI); 2980 if (Offset) 2981 MI.addImm(0); 2982 MI.addMemOperand(MMO); 2983 } 2984 2985 // Helper function to emit a frame offset adjustment from a given 2986 // pointer (SrcReg), stored into DestReg. This function is explicit 2987 // in that it requires the opcode. 2988 static void emitFrameOffsetAdj(MachineBasicBlock &MBB, 2989 MachineBasicBlock::iterator MBBI, 2990 const DebugLoc &DL, unsigned DestReg, 2991 unsigned SrcReg, int64_t Offset, unsigned Opc, 2992 const TargetInstrInfo *TII, 2993 MachineInstr::MIFlag Flag, bool NeedsWinCFI, 2994 bool *HasWinCFI) { 2995 int Sign = 1; 2996 unsigned MaxEncoding, ShiftSize; 2997 switch (Opc) { 2998 case AArch64::ADDXri: 2999 case AArch64::ADDSXri: 3000 case AArch64::SUBXri: 3001 case AArch64::SUBSXri: 3002 MaxEncoding = 0xfff; 3003 ShiftSize = 12; 3004 break; 3005 default: 3006 llvm_unreachable("Unsupported opcode"); 3007 } 3008 3009 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 3010 // scratch register. If DestReg is a virtual register, use it as the 3011 // scratch register; otherwise, create a new virtual register (to be 3012 // replaced by the scavenger at the end of PEI). That case can be optimized 3013 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 3014 // register can be loaded with offset%8 and the add/sub can use an extending 3015 // instruction with LSL#3. 3016 // Currently the function handles any offsets but generates a poor sequence 3017 // of code. 3018 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 3019 3020 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 3021 do { 3022 unsigned ThisVal = std::min<unsigned>(Offset, MaxEncodableValue); 3023 unsigned LocalShiftSize = 0; 3024 if (ThisVal > MaxEncoding) { 3025 ThisVal = ThisVal >> ShiftSize; 3026 LocalShiftSize = ShiftSize; 3027 } 3028 assert((ThisVal >> ShiftSize) <= MaxEncoding && 3029 "Encoding cannot handle value that big"); 3030 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 3031 .addReg(SrcReg) 3032 .addImm(Sign * (int)ThisVal); 3033 if (ShiftSize) 3034 MBI = MBI.addImm( 3035 AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); 3036 MBI = MBI.setMIFlag(Flag); 3037 3038 if (NeedsWinCFI) { 3039 assert(Sign == 1 && "SEH directives should always have a positive sign"); 3040 int Imm = (int)(ThisVal << LocalShiftSize); 3041 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || 3042 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { 3043 if (HasWinCFI) 3044 *HasWinCFI = true; 3045 if (Imm == 0) 3046 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); 3047 else 3048 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) 3049 .addImm(Imm) 3050 .setMIFlag(Flag); 3051 assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to " 3052 "emit a single SEH directive"); 3053 } else if (DestReg == AArch64::SP) { 3054 if (HasWinCFI) 3055 *HasWinCFI = true; 3056 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); 3057 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 3058 .addImm(Imm) 3059 .setMIFlag(Flag); 3060 } 3061 if (HasWinCFI) 3062 *HasWinCFI = true; 3063 } 3064 3065 SrcReg = DestReg; 3066 Offset -= ThisVal << LocalShiftSize; 3067 } while (Offset); 3068 } 3069 3070 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 3071 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 3072 unsigned DestReg, unsigned SrcReg, 3073 StackOffset Offset, const TargetInstrInfo *TII, 3074 MachineInstr::MIFlag Flag, bool SetNZCV, 3075 bool NeedsWinCFI, bool *HasWinCFI) { 3076 int64_t Bytes; 3077 Offset.getForFrameOffset(Bytes); 3078 3079 // First emit non-scalable frame offsets, or a simple 'mov'. 3080 if (Bytes || (!Offset && SrcReg != DestReg)) { 3081 assert((DestReg != AArch64::SP || Bytes % 16 == 0) && 3082 "SP increment/decrement not 16-byte aligned"); 3083 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; 3084 if (Bytes < 0) { 3085 Bytes = -Bytes; 3086 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; 3087 } 3088 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, 3089 NeedsWinCFI, HasWinCFI); 3090 SrcReg = DestReg; 3091 } 3092 } 3093 3094 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( 3095 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 3096 MachineBasicBlock::iterator InsertPt, int FrameIndex, 3097 LiveIntervals *LIS, VirtRegMap *VRM) const { 3098 // This is a bit of a hack. Consider this instruction: 3099 // 3100 // %0 = COPY %sp; GPR64all:%0 3101 // 3102 // We explicitly chose GPR64all for the virtual register so such a copy might 3103 // be eliminated by RegisterCoalescer. However, that may not be possible, and 3104 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all 3105 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 3106 // 3107 // To prevent that, we are going to constrain the %0 register class here. 3108 // 3109 // <rdar://problem/11522048> 3110 // 3111 if (MI.isFullCopy()) { 3112 Register DstReg = MI.getOperand(0).getReg(); 3113 Register SrcReg = MI.getOperand(1).getReg(); 3114 if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { 3115 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 3116 return nullptr; 3117 } 3118 if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { 3119 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 3120 return nullptr; 3121 } 3122 } 3123 3124 // Handle the case where a copy is being spilled or filled but the source 3125 // and destination register class don't match. For example: 3126 // 3127 // %0 = COPY %xzr; GPR64common:%0 3128 // 3129 // In this case we can still safely fold away the COPY and generate the 3130 // following spill code: 3131 // 3132 // STRXui %xzr, %stack.0 3133 // 3134 // This also eliminates spilled cross register class COPYs (e.g. between x and 3135 // d regs) of the same size. For example: 3136 // 3137 // %0 = COPY %1; GPR64:%0, FPR64:%1 3138 // 3139 // will be filled as 3140 // 3141 // LDRDui %0, fi<#0> 3142 // 3143 // instead of 3144 // 3145 // LDRXui %Temp, fi<#0> 3146 // %0 = FMOV %Temp 3147 // 3148 if (MI.isCopy() && Ops.size() == 1 && 3149 // Make sure we're only folding the explicit COPY defs/uses. 3150 (Ops[0] == 0 || Ops[0] == 1)) { 3151 bool IsSpill = Ops[0] == 0; 3152 bool IsFill = !IsSpill; 3153 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 3154 const MachineRegisterInfo &MRI = MF.getRegInfo(); 3155 MachineBasicBlock &MBB = *MI.getParent(); 3156 const MachineOperand &DstMO = MI.getOperand(0); 3157 const MachineOperand &SrcMO = MI.getOperand(1); 3158 Register DstReg = DstMO.getReg(); 3159 Register SrcReg = SrcMO.getReg(); 3160 // This is slightly expensive to compute for physical regs since 3161 // getMinimalPhysRegClass is slow. 3162 auto getRegClass = [&](unsigned Reg) { 3163 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) 3164 : TRI.getMinimalPhysRegClass(Reg); 3165 }; 3166 3167 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { 3168 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == 3169 TRI.getRegSizeInBits(*getRegClass(SrcReg)) && 3170 "Mismatched register size in non subreg COPY"); 3171 if (IsSpill) 3172 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, 3173 getRegClass(SrcReg), &TRI); 3174 else 3175 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, 3176 getRegClass(DstReg), &TRI); 3177 return &*--InsertPt; 3178 } 3179 3180 // Handle cases like spilling def of: 3181 // 3182 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0 3183 // 3184 // where the physical register source can be widened and stored to the full 3185 // virtual reg destination stack slot, in this case producing: 3186 // 3187 // STRXui %xzr, %stack.0 3188 // 3189 if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { 3190 assert(SrcMO.getSubReg() == 0 && 3191 "Unexpected subreg on physical register"); 3192 const TargetRegisterClass *SpillRC; 3193 unsigned SpillSubreg; 3194 switch (DstMO.getSubReg()) { 3195 default: 3196 SpillRC = nullptr; 3197 break; 3198 case AArch64::sub_32: 3199 case AArch64::ssub: 3200 if (AArch64::GPR32RegClass.contains(SrcReg)) { 3201 SpillRC = &AArch64::GPR64RegClass; 3202 SpillSubreg = AArch64::sub_32; 3203 } else if (AArch64::FPR32RegClass.contains(SrcReg)) { 3204 SpillRC = &AArch64::FPR64RegClass; 3205 SpillSubreg = AArch64::ssub; 3206 } else 3207 SpillRC = nullptr; 3208 break; 3209 case AArch64::dsub: 3210 if (AArch64::FPR64RegClass.contains(SrcReg)) { 3211 SpillRC = &AArch64::FPR128RegClass; 3212 SpillSubreg = AArch64::dsub; 3213 } else 3214 SpillRC = nullptr; 3215 break; 3216 } 3217 3218 if (SpillRC) 3219 if (unsigned WidenedSrcReg = 3220 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { 3221 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), 3222 FrameIndex, SpillRC, &TRI); 3223 return &*--InsertPt; 3224 } 3225 } 3226 3227 // Handle cases like filling use of: 3228 // 3229 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1 3230 // 3231 // where we can load the full virtual reg source stack slot, into the subreg 3232 // destination, in this case producing: 3233 // 3234 // LDRWui %0:sub_32<def,read-undef>, %stack.0 3235 // 3236 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { 3237 const TargetRegisterClass *FillRC; 3238 switch (DstMO.getSubReg()) { 3239 default: 3240 FillRC = nullptr; 3241 break; 3242 case AArch64::sub_32: 3243 FillRC = &AArch64::GPR32RegClass; 3244 break; 3245 case AArch64::ssub: 3246 FillRC = &AArch64::FPR32RegClass; 3247 break; 3248 case AArch64::dsub: 3249 FillRC = &AArch64::FPR64RegClass; 3250 break; 3251 } 3252 3253 if (FillRC) { 3254 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == 3255 TRI.getRegSizeInBits(*FillRC) && 3256 "Mismatched regclass size on folded subreg COPY"); 3257 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); 3258 MachineInstr &LoadMI = *--InsertPt; 3259 MachineOperand &LoadDst = LoadMI.getOperand(0); 3260 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); 3261 LoadDst.setSubReg(DstMO.getSubReg()); 3262 LoadDst.setIsUndef(); 3263 return &LoadMI; 3264 } 3265 } 3266 } 3267 3268 // Cannot fold. 3269 return nullptr; 3270 } 3271 3272 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, 3273 StackOffset &SOffset, 3274 bool *OutUseUnscaledOp, 3275 unsigned *OutUnscaledOp, 3276 int *EmittableOffset) { 3277 // Set output values in case of early exit. 3278 if (EmittableOffset) 3279 *EmittableOffset = 0; 3280 if (OutUseUnscaledOp) 3281 *OutUseUnscaledOp = false; 3282 if (OutUnscaledOp) 3283 *OutUnscaledOp = 0; 3284 3285 // Exit early for structured vector spills/fills as they can't take an 3286 // immediate offset. 3287 switch (MI.getOpcode()) { 3288 default: 3289 break; 3290 case AArch64::LD1Twov2d: 3291 case AArch64::LD1Threev2d: 3292 case AArch64::LD1Fourv2d: 3293 case AArch64::LD1Twov1d: 3294 case AArch64::LD1Threev1d: 3295 case AArch64::LD1Fourv1d: 3296 case AArch64::ST1Twov2d: 3297 case AArch64::ST1Threev2d: 3298 case AArch64::ST1Fourv2d: 3299 case AArch64::ST1Twov1d: 3300 case AArch64::ST1Threev1d: 3301 case AArch64::ST1Fourv1d: 3302 case AArch64::IRG: 3303 case AArch64::IRGstack: 3304 return AArch64FrameOffsetCannotUpdate; 3305 } 3306 3307 // Get the min/max offset and the scale. 3308 unsigned Scale, Width; 3309 int64_t MinOff, MaxOff; 3310 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff, 3311 MaxOff)) 3312 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3313 3314 // Construct the complete offset. 3315 const MachineOperand &ImmOpnd = 3316 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); 3317 int Offset = SOffset.getBytes() + ImmOpnd.getImm() * Scale; 3318 3319 // If the offset doesn't match the scale, we rewrite the instruction to 3320 // use the unscaled instruction instead. Likewise, if we have a negative 3321 // offset and there is an unscaled op to use. 3322 Optional<unsigned> UnscaledOp = 3323 AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode()); 3324 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0); 3325 if (useUnscaledOp && 3326 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)) 3327 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); 3328 3329 int64_t Remainder = Offset % Scale; 3330 assert(!(Remainder && useUnscaledOp) && 3331 "Cannot have remainder when using unscaled op"); 3332 3333 assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); 3334 int NewOffset = Offset / Scale; 3335 if (MinOff <= NewOffset && NewOffset <= MaxOff) 3336 Offset = Remainder; 3337 else { 3338 NewOffset = NewOffset < 0 ? MinOff : MaxOff; 3339 Offset = Offset - NewOffset * Scale + Remainder; 3340 } 3341 3342 if (EmittableOffset) 3343 *EmittableOffset = NewOffset; 3344 if (OutUseUnscaledOp) 3345 *OutUseUnscaledOp = useUnscaledOp; 3346 if (OutUnscaledOp && UnscaledOp) 3347 *OutUnscaledOp = *UnscaledOp; 3348 3349 SOffset = StackOffset(Offset, MVT::i8); 3350 return AArch64FrameOffsetCanUpdate | 3351 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 3352 } 3353 3354 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 3355 unsigned FrameReg, StackOffset &Offset, 3356 const AArch64InstrInfo *TII) { 3357 unsigned Opcode = MI.getOpcode(); 3358 unsigned ImmIdx = FrameRegIdx + 1; 3359 3360 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 3361 Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); 3362 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 3363 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 3364 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 3365 MI.eraseFromParent(); 3366 Offset = StackOffset(); 3367 return true; 3368 } 3369 3370 int NewOffset; 3371 unsigned UnscaledOp; 3372 bool UseUnscaledOp; 3373 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 3374 &UnscaledOp, &NewOffset); 3375 if (Status & AArch64FrameOffsetCanUpdate) { 3376 if (Status & AArch64FrameOffsetIsLegal) 3377 // Replace the FrameIndex with FrameReg. 3378 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 3379 if (UseUnscaledOp) 3380 MI.setDesc(TII->get(UnscaledOp)); 3381 3382 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 3383 return !Offset; 3384 } 3385 3386 return false; 3387 } 3388 3389 void AArch64InstrInfo::getNoop(MCInst &NopInst) const { 3390 NopInst.setOpcode(AArch64::HINT); 3391 NopInst.addOperand(MCOperand::createImm(0)); 3392 } 3393 3394 // AArch64 supports MachineCombiner. 3395 bool AArch64InstrInfo::useMachineCombiner() const { return true; } 3396 3397 // True when Opc sets flag 3398 static bool isCombineInstrSettingFlag(unsigned Opc) { 3399 switch (Opc) { 3400 case AArch64::ADDSWrr: 3401 case AArch64::ADDSWri: 3402 case AArch64::ADDSXrr: 3403 case AArch64::ADDSXri: 3404 case AArch64::SUBSWrr: 3405 case AArch64::SUBSXrr: 3406 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3407 case AArch64::SUBSWri: 3408 case AArch64::SUBSXri: 3409 return true; 3410 default: 3411 break; 3412 } 3413 return false; 3414 } 3415 3416 // 32b Opcodes that can be combined with a MUL 3417 static bool isCombineInstrCandidate32(unsigned Opc) { 3418 switch (Opc) { 3419 case AArch64::ADDWrr: 3420 case AArch64::ADDWri: 3421 case AArch64::SUBWrr: 3422 case AArch64::ADDSWrr: 3423 case AArch64::ADDSWri: 3424 case AArch64::SUBSWrr: 3425 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3426 case AArch64::SUBWri: 3427 case AArch64::SUBSWri: 3428 return true; 3429 default: 3430 break; 3431 } 3432 return false; 3433 } 3434 3435 // 64b Opcodes that can be combined with a MUL 3436 static bool isCombineInstrCandidate64(unsigned Opc) { 3437 switch (Opc) { 3438 case AArch64::ADDXrr: 3439 case AArch64::ADDXri: 3440 case AArch64::SUBXrr: 3441 case AArch64::ADDSXrr: 3442 case AArch64::ADDSXri: 3443 case AArch64::SUBSXrr: 3444 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 3445 case AArch64::SUBXri: 3446 case AArch64::SUBSXri: 3447 return true; 3448 default: 3449 break; 3450 } 3451 return false; 3452 } 3453 3454 // FP Opcodes that can be combined with a FMUL 3455 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { 3456 switch (Inst.getOpcode()) { 3457 default: 3458 break; 3459 case AArch64::FADDSrr: 3460 case AArch64::FADDDrr: 3461 case AArch64::FADDv2f32: 3462 case AArch64::FADDv2f64: 3463 case AArch64::FADDv4f32: 3464 case AArch64::FSUBSrr: 3465 case AArch64::FSUBDrr: 3466 case AArch64::FSUBv2f32: 3467 case AArch64::FSUBv2f64: 3468 case AArch64::FSUBv4f32: 3469 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 3470 return (Options.UnsafeFPMath || 3471 Options.AllowFPOpFusion == FPOpFusion::Fast); 3472 } 3473 return false; 3474 } 3475 3476 // Opcodes that can be combined with a MUL 3477 static bool isCombineInstrCandidate(unsigned Opc) { 3478 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 3479 } 3480 3481 // 3482 // Utility routine that checks if \param MO is defined by an 3483 // \param CombineOpc instruction in the basic block \param MBB 3484 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 3485 unsigned CombineOpc, unsigned ZeroReg = 0, 3486 bool CheckZeroReg = false) { 3487 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3488 MachineInstr *MI = nullptr; 3489 3490 if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) 3491 MI = MRI.getUniqueVRegDef(MO.getReg()); 3492 // And it needs to be in the trace (otherwise, it won't have a depth). 3493 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) 3494 return false; 3495 // Must only used by the user we combine with. 3496 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 3497 return false; 3498 3499 if (CheckZeroReg) { 3500 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 3501 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 3502 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 3503 // The third input reg must be zero. 3504 if (MI->getOperand(3).getReg() != ZeroReg) 3505 return false; 3506 } 3507 3508 return true; 3509 } 3510 3511 // 3512 // Is \param MO defined by an integer multiply and can be combined? 3513 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3514 unsigned MulOpc, unsigned ZeroReg) { 3515 return canCombine(MBB, MO, MulOpc, ZeroReg, true); 3516 } 3517 3518 // 3519 // Is \param MO defined by a floating-point multiply and can be combined? 3520 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, 3521 unsigned MulOpc) { 3522 return canCombine(MBB, MO, MulOpc); 3523 } 3524 3525 // TODO: There are many more machine instruction opcodes to match: 3526 // 1. Other data types (integer, vectors) 3527 // 2. Other math / logic operations (xor, or) 3528 // 3. Other forms of the same operation (intrinsics and other variants) 3529 bool AArch64InstrInfo::isAssociativeAndCommutative( 3530 const MachineInstr &Inst) const { 3531 switch (Inst.getOpcode()) { 3532 case AArch64::FADDDrr: 3533 case AArch64::FADDSrr: 3534 case AArch64::FADDv2f32: 3535 case AArch64::FADDv2f64: 3536 case AArch64::FADDv4f32: 3537 case AArch64::FMULDrr: 3538 case AArch64::FMULSrr: 3539 case AArch64::FMULX32: 3540 case AArch64::FMULX64: 3541 case AArch64::FMULXv2f32: 3542 case AArch64::FMULXv2f64: 3543 case AArch64::FMULXv4f32: 3544 case AArch64::FMULv2f32: 3545 case AArch64::FMULv2f64: 3546 case AArch64::FMULv4f32: 3547 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; 3548 default: 3549 return false; 3550 } 3551 } 3552 3553 /// Find instructions that can be turned into madd. 3554 static bool getMaddPatterns(MachineInstr &Root, 3555 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3556 unsigned Opc = Root.getOpcode(); 3557 MachineBasicBlock &MBB = *Root.getParent(); 3558 bool Found = false; 3559 3560 if (!isCombineInstrCandidate(Opc)) 3561 return false; 3562 if (isCombineInstrSettingFlag(Opc)) { 3563 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 3564 // When NZCV is live bail out. 3565 if (Cmp_NZCV == -1) 3566 return false; 3567 unsigned NewOpc = convertToNonFlagSettingOpc(Root); 3568 // When opcode can't change bail out. 3569 // CHECKME: do we miss any cases for opcode conversion? 3570 if (NewOpc == Opc) 3571 return false; 3572 Opc = NewOpc; 3573 } 3574 3575 switch (Opc) { 3576 default: 3577 break; 3578 case AArch64::ADDWrr: 3579 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3580 "ADDWrr does not have register operands"); 3581 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3582 AArch64::WZR)) { 3583 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); 3584 Found = true; 3585 } 3586 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3587 AArch64::WZR)) { 3588 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); 3589 Found = true; 3590 } 3591 break; 3592 case AArch64::ADDXrr: 3593 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3594 AArch64::XZR)) { 3595 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); 3596 Found = true; 3597 } 3598 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3599 AArch64::XZR)) { 3600 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); 3601 Found = true; 3602 } 3603 break; 3604 case AArch64::SUBWrr: 3605 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3606 AArch64::WZR)) { 3607 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); 3608 Found = true; 3609 } 3610 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 3611 AArch64::WZR)) { 3612 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); 3613 Found = true; 3614 } 3615 break; 3616 case AArch64::SUBXrr: 3617 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3618 AArch64::XZR)) { 3619 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); 3620 Found = true; 3621 } 3622 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 3623 AArch64::XZR)) { 3624 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); 3625 Found = true; 3626 } 3627 break; 3628 case AArch64::ADDWri: 3629 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3630 AArch64::WZR)) { 3631 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); 3632 Found = true; 3633 } 3634 break; 3635 case AArch64::ADDXri: 3636 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3637 AArch64::XZR)) { 3638 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); 3639 Found = true; 3640 } 3641 break; 3642 case AArch64::SUBWri: 3643 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 3644 AArch64::WZR)) { 3645 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); 3646 Found = true; 3647 } 3648 break; 3649 case AArch64::SUBXri: 3650 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 3651 AArch64::XZR)) { 3652 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); 3653 Found = true; 3654 } 3655 break; 3656 } 3657 return Found; 3658 } 3659 /// Floating-Point Support 3660 3661 /// Find instructions that can be turned into madd. 3662 static bool getFMAPatterns(MachineInstr &Root, 3663 SmallVectorImpl<MachineCombinerPattern> &Patterns) { 3664 3665 if (!isCombineInstrCandidateFP(Root)) 3666 return false; 3667 3668 MachineBasicBlock &MBB = *Root.getParent(); 3669 bool Found = false; 3670 3671 switch (Root.getOpcode()) { 3672 default: 3673 assert(false && "Unsupported FP instruction in combiner\n"); 3674 break; 3675 case AArch64::FADDSrr: 3676 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 3677 "FADDWrr does not have register operands"); 3678 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3679 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); 3680 Found = true; 3681 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3682 AArch64::FMULv1i32_indexed)) { 3683 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); 3684 Found = true; 3685 } 3686 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3687 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); 3688 Found = true; 3689 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3690 AArch64::FMULv1i32_indexed)) { 3691 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); 3692 Found = true; 3693 } 3694 break; 3695 case AArch64::FADDDrr: 3696 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3697 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); 3698 Found = true; 3699 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3700 AArch64::FMULv1i64_indexed)) { 3701 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); 3702 Found = true; 3703 } 3704 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3705 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); 3706 Found = true; 3707 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3708 AArch64::FMULv1i64_indexed)) { 3709 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); 3710 Found = true; 3711 } 3712 break; 3713 case AArch64::FADDv2f32: 3714 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3715 AArch64::FMULv2i32_indexed)) { 3716 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); 3717 Found = true; 3718 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3719 AArch64::FMULv2f32)) { 3720 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); 3721 Found = true; 3722 } 3723 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3724 AArch64::FMULv2i32_indexed)) { 3725 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); 3726 Found = true; 3727 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3728 AArch64::FMULv2f32)) { 3729 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); 3730 Found = true; 3731 } 3732 break; 3733 case AArch64::FADDv2f64: 3734 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3735 AArch64::FMULv2i64_indexed)) { 3736 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); 3737 Found = true; 3738 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3739 AArch64::FMULv2f64)) { 3740 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); 3741 Found = true; 3742 } 3743 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3744 AArch64::FMULv2i64_indexed)) { 3745 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); 3746 Found = true; 3747 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3748 AArch64::FMULv2f64)) { 3749 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); 3750 Found = true; 3751 } 3752 break; 3753 case AArch64::FADDv4f32: 3754 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3755 AArch64::FMULv4i32_indexed)) { 3756 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); 3757 Found = true; 3758 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3759 AArch64::FMULv4f32)) { 3760 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); 3761 Found = true; 3762 } 3763 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3764 AArch64::FMULv4i32_indexed)) { 3765 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); 3766 Found = true; 3767 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3768 AArch64::FMULv4f32)) { 3769 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); 3770 Found = true; 3771 } 3772 break; 3773 3774 case AArch64::FSUBSrr: 3775 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { 3776 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); 3777 Found = true; 3778 } 3779 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { 3780 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); 3781 Found = true; 3782 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3783 AArch64::FMULv1i32_indexed)) { 3784 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); 3785 Found = true; 3786 } 3787 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { 3788 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); 3789 Found = true; 3790 } 3791 break; 3792 case AArch64::FSUBDrr: 3793 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { 3794 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); 3795 Found = true; 3796 } 3797 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { 3798 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); 3799 Found = true; 3800 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3801 AArch64::FMULv1i64_indexed)) { 3802 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); 3803 Found = true; 3804 } 3805 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { 3806 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); 3807 Found = true; 3808 } 3809 break; 3810 case AArch64::FSUBv2f32: 3811 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3812 AArch64::FMULv2i32_indexed)) { 3813 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); 3814 Found = true; 3815 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3816 AArch64::FMULv2f32)) { 3817 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); 3818 Found = true; 3819 } 3820 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3821 AArch64::FMULv2i32_indexed)) { 3822 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); 3823 Found = true; 3824 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3825 AArch64::FMULv2f32)) { 3826 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); 3827 Found = true; 3828 } 3829 break; 3830 case AArch64::FSUBv2f64: 3831 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3832 AArch64::FMULv2i64_indexed)) { 3833 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); 3834 Found = true; 3835 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3836 AArch64::FMULv2f64)) { 3837 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); 3838 Found = true; 3839 } 3840 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3841 AArch64::FMULv2i64_indexed)) { 3842 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); 3843 Found = true; 3844 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3845 AArch64::FMULv2f64)) { 3846 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); 3847 Found = true; 3848 } 3849 break; 3850 case AArch64::FSUBv4f32: 3851 if (canCombineWithFMUL(MBB, Root.getOperand(2), 3852 AArch64::FMULv4i32_indexed)) { 3853 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); 3854 Found = true; 3855 } else if (canCombineWithFMUL(MBB, Root.getOperand(2), 3856 AArch64::FMULv4f32)) { 3857 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); 3858 Found = true; 3859 } 3860 if (canCombineWithFMUL(MBB, Root.getOperand(1), 3861 AArch64::FMULv4i32_indexed)) { 3862 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); 3863 Found = true; 3864 } else if (canCombineWithFMUL(MBB, Root.getOperand(1), 3865 AArch64::FMULv4f32)) { 3866 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); 3867 Found = true; 3868 } 3869 break; 3870 } 3871 return Found; 3872 } 3873 3874 /// Return true when a code sequence can improve throughput. It 3875 /// should be called only for instructions in loops. 3876 /// \param Pattern - combiner pattern 3877 bool AArch64InstrInfo::isThroughputPattern( 3878 MachineCombinerPattern Pattern) const { 3879 switch (Pattern) { 3880 default: 3881 break; 3882 case MachineCombinerPattern::FMULADDS_OP1: 3883 case MachineCombinerPattern::FMULADDS_OP2: 3884 case MachineCombinerPattern::FMULSUBS_OP1: 3885 case MachineCombinerPattern::FMULSUBS_OP2: 3886 case MachineCombinerPattern::FMULADDD_OP1: 3887 case MachineCombinerPattern::FMULADDD_OP2: 3888 case MachineCombinerPattern::FMULSUBD_OP1: 3889 case MachineCombinerPattern::FMULSUBD_OP2: 3890 case MachineCombinerPattern::FNMULSUBS_OP1: 3891 case MachineCombinerPattern::FNMULSUBD_OP1: 3892 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 3893 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 3894 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 3895 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 3896 case MachineCombinerPattern::FMLAv2f32_OP2: 3897 case MachineCombinerPattern::FMLAv2f32_OP1: 3898 case MachineCombinerPattern::FMLAv2f64_OP1: 3899 case MachineCombinerPattern::FMLAv2f64_OP2: 3900 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 3901 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 3902 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 3903 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 3904 case MachineCombinerPattern::FMLAv4f32_OP1: 3905 case MachineCombinerPattern::FMLAv4f32_OP2: 3906 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 3907 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 3908 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 3909 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 3910 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 3911 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 3912 case MachineCombinerPattern::FMLSv2f32_OP2: 3913 case MachineCombinerPattern::FMLSv2f64_OP2: 3914 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 3915 case MachineCombinerPattern::FMLSv4f32_OP2: 3916 return true; 3917 } // end switch (Pattern) 3918 return false; 3919 } 3920 /// Return true when there is potentially a faster code sequence for an 3921 /// instruction chain ending in \p Root. All potential patterns are listed in 3922 /// the \p Pattern vector. Pattern should be sorted in priority order since the 3923 /// pattern evaluator stops checking as soon as it finds a faster sequence. 3924 3925 bool AArch64InstrInfo::getMachineCombinerPatterns( 3926 MachineInstr &Root, 3927 SmallVectorImpl<MachineCombinerPattern> &Patterns) const { 3928 // Integer patterns 3929 if (getMaddPatterns(Root, Patterns)) 3930 return true; 3931 // Floating point patterns 3932 if (getFMAPatterns(Root, Patterns)) 3933 return true; 3934 3935 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); 3936 } 3937 3938 enum class FMAInstKind { Default, Indexed, Accumulator }; 3939 /// genFusedMultiply - Generate fused multiply instructions. 3940 /// This function supports both integer and floating point instructions. 3941 /// A typical example: 3942 /// F|MUL I=A,B,0 3943 /// F|ADD R,I,C 3944 /// ==> F|MADD R,A,B,C 3945 /// \param MF Containing MachineFunction 3946 /// \param MRI Register information 3947 /// \param TII Target information 3948 /// \param Root is the F|ADD instruction 3949 /// \param [out] InsInstrs is a vector of machine instructions and will 3950 /// contain the generated madd instruction 3951 /// \param IdxMulOpd is index of operand in Root that is the result of 3952 /// the F|MUL. In the example above IdxMulOpd is 1. 3953 /// \param MaddOpc the opcode fo the f|madd instruction 3954 /// \param RC Register class of operands 3955 /// \param kind of fma instruction (addressing mode) to be generated 3956 /// \param ReplacedAddend is the result register from the instruction 3957 /// replacing the non-combined operand, if any. 3958 static MachineInstr * 3959 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, 3960 const TargetInstrInfo *TII, MachineInstr &Root, 3961 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, 3962 unsigned MaddOpc, const TargetRegisterClass *RC, 3963 FMAInstKind kind = FMAInstKind::Default, 3964 const Register *ReplacedAddend = nullptr) { 3965 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 3966 3967 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 3968 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 3969 Register ResultReg = Root.getOperand(0).getReg(); 3970 Register SrcReg0 = MUL->getOperand(1).getReg(); 3971 bool Src0IsKill = MUL->getOperand(1).isKill(); 3972 Register SrcReg1 = MUL->getOperand(2).getReg(); 3973 bool Src1IsKill = MUL->getOperand(2).isKill(); 3974 3975 unsigned SrcReg2; 3976 bool Src2IsKill; 3977 if (ReplacedAddend) { 3978 // If we just generated a new addend, we must be it's only use. 3979 SrcReg2 = *ReplacedAddend; 3980 Src2IsKill = true; 3981 } else { 3982 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 3983 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 3984 } 3985 3986 if (Register::isVirtualRegister(ResultReg)) 3987 MRI.constrainRegClass(ResultReg, RC); 3988 if (Register::isVirtualRegister(SrcReg0)) 3989 MRI.constrainRegClass(SrcReg0, RC); 3990 if (Register::isVirtualRegister(SrcReg1)) 3991 MRI.constrainRegClass(SrcReg1, RC); 3992 if (Register::isVirtualRegister(SrcReg2)) 3993 MRI.constrainRegClass(SrcReg2, RC); 3994 3995 MachineInstrBuilder MIB; 3996 if (kind == FMAInstKind::Default) 3997 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 3998 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 3999 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4000 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 4001 else if (kind == FMAInstKind::Indexed) 4002 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4003 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4004 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4005 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4006 .addImm(MUL->getOperand(3).getImm()); 4007 else if (kind == FMAInstKind::Accumulator) 4008 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4009 .addReg(SrcReg2, getKillRegState(Src2IsKill)) 4010 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4011 .addReg(SrcReg1, getKillRegState(Src1IsKill)); 4012 else 4013 assert(false && "Invalid FMA instruction kind \n"); 4014 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) 4015 InsInstrs.push_back(MIB); 4016 return MUL; 4017 } 4018 4019 /// genMaddR - Generate madd instruction and combine mul and add using 4020 /// an extra virtual register 4021 /// Example - an ADD intermediate needs to be stored in a register: 4022 /// MUL I=A,B,0 4023 /// ADD R,I,Imm 4024 /// ==> ORR V, ZR, Imm 4025 /// ==> MADD R,A,B,V 4026 /// \param MF Containing MachineFunction 4027 /// \param MRI Register information 4028 /// \param TII Target information 4029 /// \param Root is the ADD instruction 4030 /// \param [out] InsInstrs is a vector of machine instructions and will 4031 /// contain the generated madd instruction 4032 /// \param IdxMulOpd is index of operand in Root that is the result of 4033 /// the MUL. In the example above IdxMulOpd is 1. 4034 /// \param MaddOpc the opcode fo the madd instruction 4035 /// \param VR is a virtual register that holds the value of an ADD operand 4036 /// (V in the example above). 4037 /// \param RC Register class of operands 4038 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 4039 const TargetInstrInfo *TII, MachineInstr &Root, 4040 SmallVectorImpl<MachineInstr *> &InsInstrs, 4041 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, 4042 const TargetRegisterClass *RC) { 4043 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 4044 4045 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 4046 Register ResultReg = Root.getOperand(0).getReg(); 4047 Register SrcReg0 = MUL->getOperand(1).getReg(); 4048 bool Src0IsKill = MUL->getOperand(1).isKill(); 4049 Register SrcReg1 = MUL->getOperand(2).getReg(); 4050 bool Src1IsKill = MUL->getOperand(2).isKill(); 4051 4052 if (Register::isVirtualRegister(ResultReg)) 4053 MRI.constrainRegClass(ResultReg, RC); 4054 if (Register::isVirtualRegister(SrcReg0)) 4055 MRI.constrainRegClass(SrcReg0, RC); 4056 if (Register::isVirtualRegister(SrcReg1)) 4057 MRI.constrainRegClass(SrcReg1, RC); 4058 if (Register::isVirtualRegister(VR)) 4059 MRI.constrainRegClass(VR, RC); 4060 4061 MachineInstrBuilder MIB = 4062 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) 4063 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 4064 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 4065 .addReg(VR); 4066 // Insert the MADD 4067 InsInstrs.push_back(MIB); 4068 return MUL; 4069 } 4070 4071 /// When getMachineCombinerPatterns() finds potential patterns, 4072 /// this function generates the instructions that could replace the 4073 /// original code sequence 4074 void AArch64InstrInfo::genAlternativeCodeSequence( 4075 MachineInstr &Root, MachineCombinerPattern Pattern, 4076 SmallVectorImpl<MachineInstr *> &InsInstrs, 4077 SmallVectorImpl<MachineInstr *> &DelInstrs, 4078 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 4079 MachineBasicBlock &MBB = *Root.getParent(); 4080 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 4081 MachineFunction &MF = *MBB.getParent(); 4082 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 4083 4084 MachineInstr *MUL; 4085 const TargetRegisterClass *RC; 4086 unsigned Opc; 4087 switch (Pattern) { 4088 default: 4089 // Reassociate instructions. 4090 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 4091 DelInstrs, InstrIdxForVirtReg); 4092 return; 4093 case MachineCombinerPattern::MULADDW_OP1: 4094 case MachineCombinerPattern::MULADDX_OP1: 4095 // MUL I=A,B,0 4096 // ADD R,I,C 4097 // ==> MADD R,A,B,C 4098 // --- Create(MADD); 4099 if (Pattern == MachineCombinerPattern::MULADDW_OP1) { 4100 Opc = AArch64::MADDWrrr; 4101 RC = &AArch64::GPR32RegClass; 4102 } else { 4103 Opc = AArch64::MADDXrrr; 4104 RC = &AArch64::GPR64RegClass; 4105 } 4106 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4107 break; 4108 case MachineCombinerPattern::MULADDW_OP2: 4109 case MachineCombinerPattern::MULADDX_OP2: 4110 // MUL I=A,B,0 4111 // ADD R,C,I 4112 // ==> MADD R,A,B,C 4113 // --- Create(MADD); 4114 if (Pattern == MachineCombinerPattern::MULADDW_OP2) { 4115 Opc = AArch64::MADDWrrr; 4116 RC = &AArch64::GPR32RegClass; 4117 } else { 4118 Opc = AArch64::MADDXrrr; 4119 RC = &AArch64::GPR64RegClass; 4120 } 4121 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4122 break; 4123 case MachineCombinerPattern::MULADDWI_OP1: 4124 case MachineCombinerPattern::MULADDXI_OP1: { 4125 // MUL I=A,B,0 4126 // ADD R,I,Imm 4127 // ==> ORR V, ZR, Imm 4128 // ==> MADD R,A,B,V 4129 // --- Create(MADD); 4130 const TargetRegisterClass *OrrRC; 4131 unsigned BitSize, OrrOpc, ZeroReg; 4132 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { 4133 OrrOpc = AArch64::ORRWri; 4134 OrrRC = &AArch64::GPR32spRegClass; 4135 BitSize = 32; 4136 ZeroReg = AArch64::WZR; 4137 Opc = AArch64::MADDWrrr; 4138 RC = &AArch64::GPR32RegClass; 4139 } else { 4140 OrrOpc = AArch64::ORRXri; 4141 OrrRC = &AArch64::GPR64spRegClass; 4142 BitSize = 64; 4143 ZeroReg = AArch64::XZR; 4144 Opc = AArch64::MADDXrrr; 4145 RC = &AArch64::GPR64RegClass; 4146 } 4147 Register NewVR = MRI.createVirtualRegister(OrrRC); 4148 uint64_t Imm = Root.getOperand(2).getImm(); 4149 4150 if (Root.getOperand(3).isImm()) { 4151 unsigned Val = Root.getOperand(3).getImm(); 4152 Imm = Imm << Val; 4153 } 4154 uint64_t UImm = SignExtend64(Imm, BitSize); 4155 uint64_t Encoding; 4156 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4157 MachineInstrBuilder MIB1 = 4158 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4159 .addReg(ZeroReg) 4160 .addImm(Encoding); 4161 InsInstrs.push_back(MIB1); 4162 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4163 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4164 } 4165 break; 4166 } 4167 case MachineCombinerPattern::MULSUBW_OP1: 4168 case MachineCombinerPattern::MULSUBX_OP1: { 4169 // MUL I=A,B,0 4170 // SUB R,I, C 4171 // ==> SUB V, 0, C 4172 // ==> MADD R,A,B,V // = -C + A*B 4173 // --- Create(MADD); 4174 const TargetRegisterClass *SubRC; 4175 unsigned SubOpc, ZeroReg; 4176 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { 4177 SubOpc = AArch64::SUBWrr; 4178 SubRC = &AArch64::GPR32spRegClass; 4179 ZeroReg = AArch64::WZR; 4180 Opc = AArch64::MADDWrrr; 4181 RC = &AArch64::GPR32RegClass; 4182 } else { 4183 SubOpc = AArch64::SUBXrr; 4184 SubRC = &AArch64::GPR64spRegClass; 4185 ZeroReg = AArch64::XZR; 4186 Opc = AArch64::MADDXrrr; 4187 RC = &AArch64::GPR64RegClass; 4188 } 4189 Register NewVR = MRI.createVirtualRegister(SubRC); 4190 // SUB NewVR, 0, C 4191 MachineInstrBuilder MIB1 = 4192 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 4193 .addReg(ZeroReg) 4194 .add(Root.getOperand(2)); 4195 InsInstrs.push_back(MIB1); 4196 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4197 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4198 break; 4199 } 4200 case MachineCombinerPattern::MULSUBW_OP2: 4201 case MachineCombinerPattern::MULSUBX_OP2: 4202 // MUL I=A,B,0 4203 // SUB R,C,I 4204 // ==> MSUB R,A,B,C (computes C - A*B) 4205 // --- Create(MSUB); 4206 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { 4207 Opc = AArch64::MSUBWrrr; 4208 RC = &AArch64::GPR32RegClass; 4209 } else { 4210 Opc = AArch64::MSUBXrrr; 4211 RC = &AArch64::GPR64RegClass; 4212 } 4213 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4214 break; 4215 case MachineCombinerPattern::MULSUBWI_OP1: 4216 case MachineCombinerPattern::MULSUBXI_OP1: { 4217 // MUL I=A,B,0 4218 // SUB R,I, Imm 4219 // ==> ORR V, ZR, -Imm 4220 // ==> MADD R,A,B,V // = -Imm + A*B 4221 // --- Create(MADD); 4222 const TargetRegisterClass *OrrRC; 4223 unsigned BitSize, OrrOpc, ZeroReg; 4224 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { 4225 OrrOpc = AArch64::ORRWri; 4226 OrrRC = &AArch64::GPR32spRegClass; 4227 BitSize = 32; 4228 ZeroReg = AArch64::WZR; 4229 Opc = AArch64::MADDWrrr; 4230 RC = &AArch64::GPR32RegClass; 4231 } else { 4232 OrrOpc = AArch64::ORRXri; 4233 OrrRC = &AArch64::GPR64spRegClass; 4234 BitSize = 64; 4235 ZeroReg = AArch64::XZR; 4236 Opc = AArch64::MADDXrrr; 4237 RC = &AArch64::GPR64RegClass; 4238 } 4239 Register NewVR = MRI.createVirtualRegister(OrrRC); 4240 uint64_t Imm = Root.getOperand(2).getImm(); 4241 if (Root.getOperand(3).isImm()) { 4242 unsigned Val = Root.getOperand(3).getImm(); 4243 Imm = Imm << Val; 4244 } 4245 uint64_t UImm = SignExtend64(-Imm, BitSize); 4246 uint64_t Encoding; 4247 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 4248 MachineInstrBuilder MIB1 = 4249 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 4250 .addReg(ZeroReg) 4251 .addImm(Encoding); 4252 InsInstrs.push_back(MIB1); 4253 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4254 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 4255 } 4256 break; 4257 } 4258 // Floating Point Support 4259 case MachineCombinerPattern::FMULADDS_OP1: 4260 case MachineCombinerPattern::FMULADDD_OP1: 4261 // MUL I=A,B,0 4262 // ADD R,I,C 4263 // ==> MADD R,A,B,C 4264 // --- Create(MADD); 4265 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { 4266 Opc = AArch64::FMADDSrrr; 4267 RC = &AArch64::FPR32RegClass; 4268 } else { 4269 Opc = AArch64::FMADDDrrr; 4270 RC = &AArch64::FPR64RegClass; 4271 } 4272 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4273 break; 4274 case MachineCombinerPattern::FMULADDS_OP2: 4275 case MachineCombinerPattern::FMULADDD_OP2: 4276 // FMUL I=A,B,0 4277 // FADD R,C,I 4278 // ==> FMADD R,A,B,C 4279 // --- Create(FMADD); 4280 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { 4281 Opc = AArch64::FMADDSrrr; 4282 RC = &AArch64::FPR32RegClass; 4283 } else { 4284 Opc = AArch64::FMADDDrrr; 4285 RC = &AArch64::FPR64RegClass; 4286 } 4287 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4288 break; 4289 4290 case MachineCombinerPattern::FMLAv1i32_indexed_OP1: 4291 Opc = AArch64::FMLAv1i32_indexed; 4292 RC = &AArch64::FPR32RegClass; 4293 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4294 FMAInstKind::Indexed); 4295 break; 4296 case MachineCombinerPattern::FMLAv1i32_indexed_OP2: 4297 Opc = AArch64::FMLAv1i32_indexed; 4298 RC = &AArch64::FPR32RegClass; 4299 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4300 FMAInstKind::Indexed); 4301 break; 4302 4303 case MachineCombinerPattern::FMLAv1i64_indexed_OP1: 4304 Opc = AArch64::FMLAv1i64_indexed; 4305 RC = &AArch64::FPR64RegClass; 4306 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4307 FMAInstKind::Indexed); 4308 break; 4309 case MachineCombinerPattern::FMLAv1i64_indexed_OP2: 4310 Opc = AArch64::FMLAv1i64_indexed; 4311 RC = &AArch64::FPR64RegClass; 4312 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4313 FMAInstKind::Indexed); 4314 break; 4315 4316 case MachineCombinerPattern::FMLAv2i32_indexed_OP1: 4317 case MachineCombinerPattern::FMLAv2f32_OP1: 4318 RC = &AArch64::FPR64RegClass; 4319 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { 4320 Opc = AArch64::FMLAv2i32_indexed; 4321 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4322 FMAInstKind::Indexed); 4323 } else { 4324 Opc = AArch64::FMLAv2f32; 4325 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4326 FMAInstKind::Accumulator); 4327 } 4328 break; 4329 case MachineCombinerPattern::FMLAv2i32_indexed_OP2: 4330 case MachineCombinerPattern::FMLAv2f32_OP2: 4331 RC = &AArch64::FPR64RegClass; 4332 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { 4333 Opc = AArch64::FMLAv2i32_indexed; 4334 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4335 FMAInstKind::Indexed); 4336 } else { 4337 Opc = AArch64::FMLAv2f32; 4338 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4339 FMAInstKind::Accumulator); 4340 } 4341 break; 4342 4343 case MachineCombinerPattern::FMLAv2i64_indexed_OP1: 4344 case MachineCombinerPattern::FMLAv2f64_OP1: 4345 RC = &AArch64::FPR128RegClass; 4346 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { 4347 Opc = AArch64::FMLAv2i64_indexed; 4348 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4349 FMAInstKind::Indexed); 4350 } else { 4351 Opc = AArch64::FMLAv2f64; 4352 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4353 FMAInstKind::Accumulator); 4354 } 4355 break; 4356 case MachineCombinerPattern::FMLAv2i64_indexed_OP2: 4357 case MachineCombinerPattern::FMLAv2f64_OP2: 4358 RC = &AArch64::FPR128RegClass; 4359 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { 4360 Opc = AArch64::FMLAv2i64_indexed; 4361 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4362 FMAInstKind::Indexed); 4363 } else { 4364 Opc = AArch64::FMLAv2f64; 4365 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4366 FMAInstKind::Accumulator); 4367 } 4368 break; 4369 4370 case MachineCombinerPattern::FMLAv4i32_indexed_OP1: 4371 case MachineCombinerPattern::FMLAv4f32_OP1: 4372 RC = &AArch64::FPR128RegClass; 4373 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { 4374 Opc = AArch64::FMLAv4i32_indexed; 4375 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4376 FMAInstKind::Indexed); 4377 } else { 4378 Opc = AArch64::FMLAv4f32; 4379 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4380 FMAInstKind::Accumulator); 4381 } 4382 break; 4383 4384 case MachineCombinerPattern::FMLAv4i32_indexed_OP2: 4385 case MachineCombinerPattern::FMLAv4f32_OP2: 4386 RC = &AArch64::FPR128RegClass; 4387 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { 4388 Opc = AArch64::FMLAv4i32_indexed; 4389 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4390 FMAInstKind::Indexed); 4391 } else { 4392 Opc = AArch64::FMLAv4f32; 4393 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4394 FMAInstKind::Accumulator); 4395 } 4396 break; 4397 4398 case MachineCombinerPattern::FMULSUBS_OP1: 4399 case MachineCombinerPattern::FMULSUBD_OP1: { 4400 // FMUL I=A,B,0 4401 // FSUB R,I,C 4402 // ==> FNMSUB R,A,B,C // = -C + A*B 4403 // --- Create(FNMSUB); 4404 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { 4405 Opc = AArch64::FNMSUBSrrr; 4406 RC = &AArch64::FPR32RegClass; 4407 } else { 4408 Opc = AArch64::FNMSUBDrrr; 4409 RC = &AArch64::FPR64RegClass; 4410 } 4411 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4412 break; 4413 } 4414 4415 case MachineCombinerPattern::FNMULSUBS_OP1: 4416 case MachineCombinerPattern::FNMULSUBD_OP1: { 4417 // FNMUL I=A,B,0 4418 // FSUB R,I,C 4419 // ==> FNMADD R,A,B,C // = -A*B - C 4420 // --- Create(FNMADD); 4421 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { 4422 Opc = AArch64::FNMADDSrrr; 4423 RC = &AArch64::FPR32RegClass; 4424 } else { 4425 Opc = AArch64::FNMADDDrrr; 4426 RC = &AArch64::FPR64RegClass; 4427 } 4428 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 4429 break; 4430 } 4431 4432 case MachineCombinerPattern::FMULSUBS_OP2: 4433 case MachineCombinerPattern::FMULSUBD_OP2: { 4434 // FMUL I=A,B,0 4435 // FSUB R,C,I 4436 // ==> FMSUB R,A,B,C (computes C - A*B) 4437 // --- Create(FMSUB); 4438 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { 4439 Opc = AArch64::FMSUBSrrr; 4440 RC = &AArch64::FPR32RegClass; 4441 } else { 4442 Opc = AArch64::FMSUBDrrr; 4443 RC = &AArch64::FPR64RegClass; 4444 } 4445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 4446 break; 4447 } 4448 4449 case MachineCombinerPattern::FMLSv1i32_indexed_OP2: 4450 Opc = AArch64::FMLSv1i32_indexed; 4451 RC = &AArch64::FPR32RegClass; 4452 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4453 FMAInstKind::Indexed); 4454 break; 4455 4456 case MachineCombinerPattern::FMLSv1i64_indexed_OP2: 4457 Opc = AArch64::FMLSv1i64_indexed; 4458 RC = &AArch64::FPR64RegClass; 4459 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4460 FMAInstKind::Indexed); 4461 break; 4462 4463 case MachineCombinerPattern::FMLSv2f32_OP2: 4464 case MachineCombinerPattern::FMLSv2i32_indexed_OP2: 4465 RC = &AArch64::FPR64RegClass; 4466 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { 4467 Opc = AArch64::FMLSv2i32_indexed; 4468 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4469 FMAInstKind::Indexed); 4470 } else { 4471 Opc = AArch64::FMLSv2f32; 4472 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4473 FMAInstKind::Accumulator); 4474 } 4475 break; 4476 4477 case MachineCombinerPattern::FMLSv2f64_OP2: 4478 case MachineCombinerPattern::FMLSv2i64_indexed_OP2: 4479 RC = &AArch64::FPR128RegClass; 4480 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { 4481 Opc = AArch64::FMLSv2i64_indexed; 4482 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4483 FMAInstKind::Indexed); 4484 } else { 4485 Opc = AArch64::FMLSv2f64; 4486 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4487 FMAInstKind::Accumulator); 4488 } 4489 break; 4490 4491 case MachineCombinerPattern::FMLSv4f32_OP2: 4492 case MachineCombinerPattern::FMLSv4i32_indexed_OP2: 4493 RC = &AArch64::FPR128RegClass; 4494 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { 4495 Opc = AArch64::FMLSv4i32_indexed; 4496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4497 FMAInstKind::Indexed); 4498 } else { 4499 Opc = AArch64::FMLSv4f32; 4500 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, 4501 FMAInstKind::Accumulator); 4502 } 4503 break; 4504 case MachineCombinerPattern::FMLSv2f32_OP1: 4505 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { 4506 RC = &AArch64::FPR64RegClass; 4507 Register NewVR = MRI.createVirtualRegister(RC); 4508 MachineInstrBuilder MIB1 = 4509 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) 4510 .add(Root.getOperand(2)); 4511 InsInstrs.push_back(MIB1); 4512 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4513 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { 4514 Opc = AArch64::FMLAv2i32_indexed; 4515 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4516 FMAInstKind::Indexed, &NewVR); 4517 } else { 4518 Opc = AArch64::FMLAv2f32; 4519 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4520 FMAInstKind::Accumulator, &NewVR); 4521 } 4522 break; 4523 } 4524 case MachineCombinerPattern::FMLSv4f32_OP1: 4525 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { 4526 RC = &AArch64::FPR128RegClass; 4527 Register NewVR = MRI.createVirtualRegister(RC); 4528 MachineInstrBuilder MIB1 = 4529 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) 4530 .add(Root.getOperand(2)); 4531 InsInstrs.push_back(MIB1); 4532 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4533 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { 4534 Opc = AArch64::FMLAv4i32_indexed; 4535 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4536 FMAInstKind::Indexed, &NewVR); 4537 } else { 4538 Opc = AArch64::FMLAv4f32; 4539 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4540 FMAInstKind::Accumulator, &NewVR); 4541 } 4542 break; 4543 } 4544 case MachineCombinerPattern::FMLSv2f64_OP1: 4545 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { 4546 RC = &AArch64::FPR128RegClass; 4547 Register NewVR = MRI.createVirtualRegister(RC); 4548 MachineInstrBuilder MIB1 = 4549 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) 4550 .add(Root.getOperand(2)); 4551 InsInstrs.push_back(MIB1); 4552 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 4553 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { 4554 Opc = AArch64::FMLAv2i64_indexed; 4555 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4556 FMAInstKind::Indexed, &NewVR); 4557 } else { 4558 Opc = AArch64::FMLAv2f64; 4559 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, 4560 FMAInstKind::Accumulator, &NewVR); 4561 } 4562 break; 4563 } 4564 } // end switch (Pattern) 4565 // Record MUL and ADD/SUB for deletion 4566 DelInstrs.push_back(MUL); 4567 DelInstrs.push_back(&Root); 4568 } 4569 4570 /// Replace csincr-branch sequence by simple conditional branch 4571 /// 4572 /// Examples: 4573 /// 1. \code 4574 /// csinc w9, wzr, wzr, <condition code> 4575 /// tbnz w9, #0, 0x44 4576 /// \endcode 4577 /// to 4578 /// \code 4579 /// b.<inverted condition code> 4580 /// \endcode 4581 /// 4582 /// 2. \code 4583 /// csinc w9, wzr, wzr, <condition code> 4584 /// tbz w9, #0, 0x44 4585 /// \endcode 4586 /// to 4587 /// \code 4588 /// b.<condition code> 4589 /// \endcode 4590 /// 4591 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the 4592 /// compare's constant operand is power of 2. 4593 /// 4594 /// Examples: 4595 /// \code 4596 /// and w8, w8, #0x400 4597 /// cbnz w8, L1 4598 /// \endcode 4599 /// to 4600 /// \code 4601 /// tbnz w8, #10, L1 4602 /// \endcode 4603 /// 4604 /// \param MI Conditional Branch 4605 /// \return True when the simple conditional branch is generated 4606 /// 4607 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { 4608 bool IsNegativeBranch = false; 4609 bool IsTestAndBranch = false; 4610 unsigned TargetBBInMI = 0; 4611 switch (MI.getOpcode()) { 4612 default: 4613 llvm_unreachable("Unknown branch instruction?"); 4614 case AArch64::Bcc: 4615 return false; 4616 case AArch64::CBZW: 4617 case AArch64::CBZX: 4618 TargetBBInMI = 1; 4619 break; 4620 case AArch64::CBNZW: 4621 case AArch64::CBNZX: 4622 TargetBBInMI = 1; 4623 IsNegativeBranch = true; 4624 break; 4625 case AArch64::TBZW: 4626 case AArch64::TBZX: 4627 TargetBBInMI = 2; 4628 IsTestAndBranch = true; 4629 break; 4630 case AArch64::TBNZW: 4631 case AArch64::TBNZX: 4632 TargetBBInMI = 2; 4633 IsNegativeBranch = true; 4634 IsTestAndBranch = true; 4635 break; 4636 } 4637 // So we increment a zero register and test for bits other 4638 // than bit 0? Conservatively bail out in case the verifier 4639 // missed this case. 4640 if (IsTestAndBranch && MI.getOperand(1).getImm()) 4641 return false; 4642 4643 // Find Definition. 4644 assert(MI.getParent() && "Incomplete machine instruciton\n"); 4645 MachineBasicBlock *MBB = MI.getParent(); 4646 MachineFunction *MF = MBB->getParent(); 4647 MachineRegisterInfo *MRI = &MF->getRegInfo(); 4648 Register VReg = MI.getOperand(0).getReg(); 4649 if (!Register::isVirtualRegister(VReg)) 4650 return false; 4651 4652 MachineInstr *DefMI = MRI->getVRegDef(VReg); 4653 4654 // Look through COPY instructions to find definition. 4655 while (DefMI->isCopy()) { 4656 Register CopyVReg = DefMI->getOperand(1).getReg(); 4657 if (!MRI->hasOneNonDBGUse(CopyVReg)) 4658 return false; 4659 if (!MRI->hasOneDef(CopyVReg)) 4660 return false; 4661 DefMI = MRI->getVRegDef(CopyVReg); 4662 } 4663 4664 switch (DefMI->getOpcode()) { 4665 default: 4666 return false; 4667 // Fold AND into a TBZ/TBNZ if constant operand is power of 2. 4668 case AArch64::ANDWri: 4669 case AArch64::ANDXri: { 4670 if (IsTestAndBranch) 4671 return false; 4672 if (DefMI->getParent() != MBB) 4673 return false; 4674 if (!MRI->hasOneNonDBGUse(VReg)) 4675 return false; 4676 4677 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri); 4678 uint64_t Mask = AArch64_AM::decodeLogicalImmediate( 4679 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64); 4680 if (!isPowerOf2_64(Mask)) 4681 return false; 4682 4683 MachineOperand &MO = DefMI->getOperand(1); 4684 Register NewReg = MO.getReg(); 4685 if (!Register::isVirtualRegister(NewReg)) 4686 return false; 4687 4688 assert(!MRI->def_empty(NewReg) && "Register must be defined."); 4689 4690 MachineBasicBlock &RefToMBB = *MBB; 4691 MachineBasicBlock *TBB = MI.getOperand(1).getMBB(); 4692 DebugLoc DL = MI.getDebugLoc(); 4693 unsigned Imm = Log2_64(Mask); 4694 unsigned Opc = (Imm < 32) 4695 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW) 4696 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX); 4697 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc)) 4698 .addReg(NewReg) 4699 .addImm(Imm) 4700 .addMBB(TBB); 4701 // Register lives on to the CBZ now. 4702 MO.setIsKill(false); 4703 4704 // For immediate smaller than 32, we need to use the 32-bit 4705 // variant (W) in all cases. Indeed the 64-bit variant does not 4706 // allow to encode them. 4707 // Therefore, if the input register is 64-bit, we need to take the 4708 // 32-bit sub-part. 4709 if (!Is32Bit && Imm < 32) 4710 NewMI->getOperand(0).setSubReg(AArch64::sub_32); 4711 MI.eraseFromParent(); 4712 return true; 4713 } 4714 // Look for CSINC 4715 case AArch64::CSINCWr: 4716 case AArch64::CSINCXr: { 4717 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR && 4718 DefMI->getOperand(2).getReg() == AArch64::WZR) && 4719 !(DefMI->getOperand(1).getReg() == AArch64::XZR && 4720 DefMI->getOperand(2).getReg() == AArch64::XZR)) 4721 return false; 4722 4723 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 4724 return false; 4725 4726 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 4727 // Convert only when the condition code is not modified between 4728 // the CSINC and the branch. The CC may be used by other 4729 // instructions in between. 4730 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) 4731 return false; 4732 MachineBasicBlock &RefToMBB = *MBB; 4733 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); 4734 DebugLoc DL = MI.getDebugLoc(); 4735 if (IsNegativeBranch) 4736 CC = AArch64CC::getInvertedCondCode(CC); 4737 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 4738 MI.eraseFromParent(); 4739 return true; 4740 } 4741 } 4742 } 4743 4744 std::pair<unsigned, unsigned> 4745 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 4746 const unsigned Mask = AArch64II::MO_FRAGMENT; 4747 return std::make_pair(TF & Mask, TF & ~Mask); 4748 } 4749 4750 ArrayRef<std::pair<unsigned, const char *>> 4751 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 4752 using namespace AArch64II; 4753 4754 static const std::pair<unsigned, const char *> TargetFlags[] = { 4755 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, 4756 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, 4757 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, 4758 {MO_HI12, "aarch64-hi12"}}; 4759 return makeArrayRef(TargetFlags); 4760 } 4761 4762 ArrayRef<std::pair<unsigned, const char *>> 4763 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { 4764 using namespace AArch64II; 4765 4766 static const std::pair<unsigned, const char *> TargetFlags[] = { 4767 {MO_COFFSTUB, "aarch64-coffstub"}, 4768 {MO_GOT, "aarch64-got"}, 4769 {MO_NC, "aarch64-nc"}, 4770 {MO_S, "aarch64-s"}, 4771 {MO_TLS, "aarch64-tls"}, 4772 {MO_DLLIMPORT, "aarch64-dllimport"}, 4773 {MO_PREL, "aarch64-prel"}, 4774 {MO_TAGGED, "aarch64-tagged"}}; 4775 return makeArrayRef(TargetFlags); 4776 } 4777 4778 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 4779 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { 4780 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 4781 {{MOSuppressPair, "aarch64-suppress-pair"}, 4782 {MOStridedAccess, "aarch64-strided-access"}}; 4783 return makeArrayRef(TargetFlags); 4784 } 4785 4786 /// Constants defining how certain sequences should be outlined. 4787 /// This encompasses how an outlined function should be called, and what kind of 4788 /// frame should be emitted for that outlined function. 4789 /// 4790 /// \p MachineOutlinerDefault implies that the function should be called with 4791 /// a save and restore of LR to the stack. 4792 /// 4793 /// That is, 4794 /// 4795 /// I1 Save LR OUTLINED_FUNCTION: 4796 /// I2 --> BL OUTLINED_FUNCTION I1 4797 /// I3 Restore LR I2 4798 /// I3 4799 /// RET 4800 /// 4801 /// * Call construction overhead: 3 (save + BL + restore) 4802 /// * Frame construction overhead: 1 (ret) 4803 /// * Requires stack fixups? Yes 4804 /// 4805 /// \p MachineOutlinerTailCall implies that the function is being created from 4806 /// a sequence of instructions ending in a return. 4807 /// 4808 /// That is, 4809 /// 4810 /// I1 OUTLINED_FUNCTION: 4811 /// I2 --> B OUTLINED_FUNCTION I1 4812 /// RET I2 4813 /// RET 4814 /// 4815 /// * Call construction overhead: 1 (B) 4816 /// * Frame construction overhead: 0 (Return included in sequence) 4817 /// * Requires stack fixups? No 4818 /// 4819 /// \p MachineOutlinerNoLRSave implies that the function should be called using 4820 /// a BL instruction, but doesn't require LR to be saved and restored. This 4821 /// happens when LR is known to be dead. 4822 /// 4823 /// That is, 4824 /// 4825 /// I1 OUTLINED_FUNCTION: 4826 /// I2 --> BL OUTLINED_FUNCTION I1 4827 /// I3 I2 4828 /// I3 4829 /// RET 4830 /// 4831 /// * Call construction overhead: 1 (BL) 4832 /// * Frame construction overhead: 1 (RET) 4833 /// * Requires stack fixups? No 4834 /// 4835 /// \p MachineOutlinerThunk implies that the function is being created from 4836 /// a sequence of instructions ending in a call. The outlined function is 4837 /// called with a BL instruction, and the outlined function tail-calls the 4838 /// original call destination. 4839 /// 4840 /// That is, 4841 /// 4842 /// I1 OUTLINED_FUNCTION: 4843 /// I2 --> BL OUTLINED_FUNCTION I1 4844 /// BL f I2 4845 /// B f 4846 /// * Call construction overhead: 1 (BL) 4847 /// * Frame construction overhead: 0 4848 /// * Requires stack fixups? No 4849 /// 4850 /// \p MachineOutlinerRegSave implies that the function should be called with a 4851 /// save and restore of LR to an available register. This allows us to avoid 4852 /// stack fixups. Note that this outlining variant is compatible with the 4853 /// NoLRSave case. 4854 /// 4855 /// That is, 4856 /// 4857 /// I1 Save LR OUTLINED_FUNCTION: 4858 /// I2 --> BL OUTLINED_FUNCTION I1 4859 /// I3 Restore LR I2 4860 /// I3 4861 /// RET 4862 /// 4863 /// * Call construction overhead: 3 (save + BL + restore) 4864 /// * Frame construction overhead: 1 (ret) 4865 /// * Requires stack fixups? No 4866 enum MachineOutlinerClass { 4867 MachineOutlinerDefault, /// Emit a save, restore, call, and return. 4868 MachineOutlinerTailCall, /// Only emit a branch. 4869 MachineOutlinerNoLRSave, /// Emit a call and return. 4870 MachineOutlinerThunk, /// Emit a call and tail-call. 4871 MachineOutlinerRegSave /// Same as default, but save to a register. 4872 }; 4873 4874 enum MachineOutlinerMBBFlags { 4875 LRUnavailableSomewhere = 0x2, 4876 HasCalls = 0x4, 4877 UnsafeRegsDead = 0x8 4878 }; 4879 4880 unsigned 4881 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { 4882 assert(C.LRUWasSet && "LRU wasn't set?"); 4883 MachineFunction *MF = C.getMF(); 4884 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 4885 MF->getSubtarget().getRegisterInfo()); 4886 4887 // Check if there is an available register across the sequence that we can 4888 // use. 4889 for (unsigned Reg : AArch64::GPR64RegClass) { 4890 if (!ARI->isReservedReg(*MF, Reg) && 4891 Reg != AArch64::LR && // LR is not reserved, but don't use it. 4892 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. 4893 Reg != AArch64::X17 && // Ditto for X17. 4894 C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) 4895 return Reg; 4896 } 4897 4898 // No suitable register. Return 0. 4899 return 0u; 4900 } 4901 4902 outliner::OutlinedFunction 4903 AArch64InstrInfo::getOutliningCandidateInfo( 4904 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 4905 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; 4906 unsigned SequenceSize = 4907 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, 4908 [this](unsigned Sum, const MachineInstr &MI) { 4909 return Sum + getInstSizeInBytes(MI); 4910 }); 4911 4912 // Properties about candidate MBBs that hold for all of them. 4913 unsigned FlagsSetInAll = 0xF; 4914 4915 // Compute liveness information for each candidate, and set FlagsSetInAll. 4916 const TargetRegisterInfo &TRI = getRegisterInfo(); 4917 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), 4918 [&FlagsSetInAll](outliner::Candidate &C) { 4919 FlagsSetInAll &= C.Flags; 4920 }); 4921 4922 // According to the AArch64 Procedure Call Standard, the following are 4923 // undefined on entry/exit from a function call: 4924 // 4925 // * Registers x16, x17, (and thus w16, w17) 4926 // * Condition codes (and thus the NZCV register) 4927 // 4928 // Because if this, we can't outline any sequence of instructions where 4929 // one 4930 // of these registers is live into/across it. Thus, we need to delete 4931 // those 4932 // candidates. 4933 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { 4934 // If the unsafe registers in this block are all dead, then we don't need 4935 // to compute liveness here. 4936 if (C.Flags & UnsafeRegsDead) 4937 return false; 4938 C.initLRU(TRI); 4939 LiveRegUnits LRU = C.LRU; 4940 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) || 4941 !LRU.available(AArch64::NZCV)); 4942 }; 4943 4944 // Are there any candidates where those registers are live? 4945 if (!(FlagsSetInAll & UnsafeRegsDead)) { 4946 // Erase every candidate that violates the restrictions above. (It could be 4947 // true that we have viable candidates, so it's not worth bailing out in 4948 // the case that, say, 1 out of 20 candidates violate the restructions.) 4949 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), 4950 RepeatedSequenceLocs.end(), 4951 CantGuaranteeValueAcrossCall), 4952 RepeatedSequenceLocs.end()); 4953 4954 // If the sequence doesn't have enough candidates left, then we're done. 4955 if (RepeatedSequenceLocs.size() < 2) 4956 return outliner::OutlinedFunction(); 4957 } 4958 4959 // At this point, we have only "safe" candidates to outline. Figure out 4960 // frame + call instruction information. 4961 4962 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); 4963 4964 // Helper lambda which sets call information for every candidate. 4965 auto SetCandidateCallInfo = 4966 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { 4967 for (outliner::Candidate &C : RepeatedSequenceLocs) 4968 C.setCallInfo(CallID, NumBytesForCall); 4969 }; 4970 4971 unsigned FrameID = MachineOutlinerDefault; 4972 unsigned NumBytesToCreateFrame = 4; 4973 4974 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { 4975 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement"); 4976 }); 4977 4978 // Returns true if an instructions is safe to fix up, false otherwise. 4979 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) { 4980 if (MI.isCall()) 4981 return true; 4982 4983 if (!MI.modifiesRegister(AArch64::SP, &TRI) && 4984 !MI.readsRegister(AArch64::SP, &TRI)) 4985 return true; 4986 4987 // Any modification of SP will break our code to save/restore LR. 4988 // FIXME: We could handle some instructions which add a constant 4989 // offset to SP, with a bit more work. 4990 if (MI.modifiesRegister(AArch64::SP, &TRI)) 4991 return false; 4992 4993 // At this point, we have a stack instruction that we might need to 4994 // fix up. We'll handle it if it's a load or store. 4995 if (MI.mayLoadOrStore()) { 4996 const MachineOperand *Base; // Filled with the base operand of MI. 4997 int64_t Offset; // Filled with the offset of MI. 4998 4999 // Does it allow us to offset the base operand and is the base the 5000 // register SP? 5001 if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || 5002 Base->getReg() != AArch64::SP) 5003 return false; 5004 5005 // Find the minimum/maximum offset for this instruction and check 5006 // if fixing it up would be in range. 5007 int64_t MinOffset, 5008 MaxOffset; // Unscaled offsets for the instruction. 5009 unsigned Scale; // The scale to multiply the offsets by. 5010 unsigned DummyWidth; 5011 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); 5012 5013 Offset += 16; // Update the offset to what it would be if we outlined. 5014 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) 5015 return false; 5016 5017 // It's in range, so we can outline it. 5018 return true; 5019 } 5020 5021 // FIXME: Add handling for instructions like "add x0, sp, #8". 5022 5023 // We can't fix it up, so don't outline it. 5024 return false; 5025 }; 5026 5027 // True if it's possible to fix up each stack instruction in this sequence. 5028 // Important for frames/call variants that modify the stack. 5029 bool AllStackInstrsSafe = std::all_of( 5030 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup); 5031 5032 // If the last instruction in any candidate is a terminator, then we should 5033 // tail call all of the candidates. 5034 if (RepeatedSequenceLocs[0].back()->isTerminator()) { 5035 FrameID = MachineOutlinerTailCall; 5036 NumBytesToCreateFrame = 0; 5037 SetCandidateCallInfo(MachineOutlinerTailCall, 4); 5038 } 5039 5040 else if (LastInstrOpcode == AArch64::BL || 5041 (LastInstrOpcode == AArch64::BLR && !HasBTI)) { 5042 // FIXME: Do we need to check if the code after this uses the value of LR? 5043 FrameID = MachineOutlinerThunk; 5044 NumBytesToCreateFrame = 0; 5045 SetCandidateCallInfo(MachineOutlinerThunk, 4); 5046 } 5047 5048 else { 5049 // We need to decide how to emit calls + frames. We can always emit the same 5050 // frame if we don't need to save to the stack. If we have to save to the 5051 // stack, then we need a different frame. 5052 unsigned NumBytesNoStackCalls = 0; 5053 std::vector<outliner::Candidate> CandidatesWithoutStackFixups; 5054 5055 for (outliner::Candidate &C : RepeatedSequenceLocs) { 5056 C.initLRU(TRI); 5057 5058 // Is LR available? If so, we don't need a save. 5059 if (C.LRU.available(AArch64::LR)) { 5060 NumBytesNoStackCalls += 4; 5061 C.setCallInfo(MachineOutlinerNoLRSave, 4); 5062 CandidatesWithoutStackFixups.push_back(C); 5063 } 5064 5065 // Is an unused register available? If so, we won't modify the stack, so 5066 // we can outline with the same frame type as those that don't save LR. 5067 else if (findRegisterToSaveLRTo(C)) { 5068 NumBytesNoStackCalls += 12; 5069 C.setCallInfo(MachineOutlinerRegSave, 12); 5070 CandidatesWithoutStackFixups.push_back(C); 5071 } 5072 5073 // Is SP used in the sequence at all? If not, we don't have to modify 5074 // the stack, so we are guaranteed to get the same frame. 5075 else if (C.UsedInSequence.available(AArch64::SP)) { 5076 NumBytesNoStackCalls += 12; 5077 C.setCallInfo(MachineOutlinerDefault, 12); 5078 CandidatesWithoutStackFixups.push_back(C); 5079 } 5080 5081 // If we outline this, we need to modify the stack. Pretend we don't 5082 // outline this by saving all of its bytes. 5083 else { 5084 NumBytesNoStackCalls += SequenceSize; 5085 } 5086 } 5087 5088 // If there are no places where we have to save LR, then note that we 5089 // don't have to update the stack. Otherwise, give every candidate the 5090 // default call type, as long as it's safe to do so. 5091 if (!AllStackInstrsSafe || 5092 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { 5093 RepeatedSequenceLocs = CandidatesWithoutStackFixups; 5094 FrameID = MachineOutlinerNoLRSave; 5095 } else { 5096 SetCandidateCallInfo(MachineOutlinerDefault, 12); 5097 } 5098 5099 // If we dropped all of the candidates, bail out here. 5100 if (RepeatedSequenceLocs.size() < 2) { 5101 RepeatedSequenceLocs.clear(); 5102 return outliner::OutlinedFunction(); 5103 } 5104 } 5105 5106 // Does every candidate's MBB contain a call? If so, then we might have a call 5107 // in the range. 5108 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 5109 // Check if the range contains a call. These require a save + restore of the 5110 // link register. 5111 bool ModStackToSaveLR = false; 5112 if (std::any_of(FirstCand.front(), FirstCand.back(), 5113 [](const MachineInstr &MI) { return MI.isCall(); })) 5114 ModStackToSaveLR = true; 5115 5116 // Handle the last instruction separately. If this is a tail call, then the 5117 // last instruction is a call. We don't want to save + restore in this case. 5118 // However, it could be possible that the last instruction is a call without 5119 // it being valid to tail call this sequence. We should consider this as 5120 // well. 5121 else if (FrameID != MachineOutlinerThunk && 5122 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 5123 ModStackToSaveLR = true; 5124 5125 if (ModStackToSaveLR) { 5126 // We can't fix up the stack. Bail out. 5127 if (!AllStackInstrsSafe) { 5128 RepeatedSequenceLocs.clear(); 5129 return outliner::OutlinedFunction(); 5130 } 5131 5132 // Save + restore LR. 5133 NumBytesToCreateFrame += 8; 5134 } 5135 } 5136 5137 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 5138 NumBytesToCreateFrame, FrameID); 5139 } 5140 5141 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( 5142 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 5143 const Function &F = MF.getFunction(); 5144 5145 // Can F be deduplicated by the linker? If it can, don't outline from it. 5146 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 5147 return false; 5148 5149 // Don't outline from functions with section markings; the program could 5150 // expect that all the code is in the named section. 5151 // FIXME: Allow outlining from multiple functions with the same section 5152 // marking. 5153 if (F.hasSection()) 5154 return false; 5155 5156 // Outlining from functions with redzones is unsafe since the outliner may 5157 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't 5158 // outline from it. 5159 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 5160 if (!AFI || AFI->hasRedZone().getValueOr(true)) 5161 return false; 5162 5163 // It's safe to outline from MF. 5164 return true; 5165 } 5166 5167 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 5168 unsigned &Flags) const { 5169 // Check if LR is available through all of the MBB. If it's not, then set 5170 // a flag. 5171 assert(MBB.getParent()->getRegInfo().tracksLiveness() && 5172 "Suitable Machine Function for outlining must track liveness"); 5173 LiveRegUnits LRU(getRegisterInfo()); 5174 5175 std::for_each(MBB.rbegin(), MBB.rend(), 5176 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); 5177 5178 // Check if each of the unsafe registers are available... 5179 bool W16AvailableInBlock = LRU.available(AArch64::W16); 5180 bool W17AvailableInBlock = LRU.available(AArch64::W17); 5181 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV); 5182 5183 // If all of these are dead (and not live out), we know we don't have to check 5184 // them later. 5185 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock) 5186 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; 5187 5188 // Now, add the live outs to the set. 5189 LRU.addLiveOuts(MBB); 5190 5191 // If any of these registers is available in the MBB, but also a live out of 5192 // the block, then we know outlining is unsafe. 5193 if (W16AvailableInBlock && !LRU.available(AArch64::W16)) 5194 return false; 5195 if (W17AvailableInBlock && !LRU.available(AArch64::W17)) 5196 return false; 5197 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV)) 5198 return false; 5199 5200 // Check if there's a call inside this MachineBasicBlock. If there is, then 5201 // set a flag. 5202 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) 5203 Flags |= MachineOutlinerMBBFlags::HasCalls; 5204 5205 MachineFunction *MF = MBB.getParent(); 5206 5207 // In the event that we outline, we may have to save LR. If there is an 5208 // available register in the MBB, then we'll always save LR there. Check if 5209 // this is true. 5210 bool CanSaveLR = false; 5211 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( 5212 MF->getSubtarget().getRegisterInfo()); 5213 5214 // Check if there is an available register across the sequence that we can 5215 // use. 5216 for (unsigned Reg : AArch64::GPR64RegClass) { 5217 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR && 5218 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) { 5219 CanSaveLR = true; 5220 break; 5221 } 5222 } 5223 5224 // Check if we have a register we can save LR to, and if LR was used 5225 // somewhere. If both of those things are true, then we need to evaluate the 5226 // safety of outlining stack instructions later. 5227 if (!CanSaveLR && !LRU.available(AArch64::LR)) 5228 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; 5229 5230 return true; 5231 } 5232 5233 outliner::InstrType 5234 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, 5235 unsigned Flags) const { 5236 MachineInstr &MI = *MIT; 5237 MachineBasicBlock *MBB = MI.getParent(); 5238 MachineFunction *MF = MBB->getParent(); 5239 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); 5240 5241 // Don't outline LOHs. 5242 if (FuncInfo->getLOHRelated().count(&MI)) 5243 return outliner::InstrType::Illegal; 5244 5245 // Don't allow debug values to impact outlining type. 5246 if (MI.isDebugInstr() || MI.isIndirectDebugValue()) 5247 return outliner::InstrType::Invisible; 5248 5249 // At this point, KILL instructions don't really tell us much so we can go 5250 // ahead and skip over them. 5251 if (MI.isKill()) 5252 return outliner::InstrType::Invisible; 5253 5254 // Is this a terminator for a basic block? 5255 if (MI.isTerminator()) { 5256 5257 // Is this the end of a function? 5258 if (MI.getParent()->succ_empty()) 5259 return outliner::InstrType::Legal; 5260 5261 // It's not, so don't outline it. 5262 return outliner::InstrType::Illegal; 5263 } 5264 5265 // Make sure none of the operands are un-outlinable. 5266 for (const MachineOperand &MOP : MI.operands()) { 5267 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || 5268 MOP.isTargetIndex()) 5269 return outliner::InstrType::Illegal; 5270 5271 // If it uses LR or W30 explicitly, then don't touch it. 5272 if (MOP.isReg() && !MOP.isImplicit() && 5273 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30)) 5274 return outliner::InstrType::Illegal; 5275 } 5276 5277 // Special cases for instructions that can always be outlined, but will fail 5278 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always 5279 // be outlined because they don't require a *specific* value to be in LR. 5280 if (MI.getOpcode() == AArch64::ADRP) 5281 return outliner::InstrType::Legal; 5282 5283 // If MI is a call we might be able to outline it. We don't want to outline 5284 // any calls that rely on the position of items on the stack. When we outline 5285 // something containing a call, we have to emit a save and restore of LR in 5286 // the outlined function. Currently, this always happens by saving LR to the 5287 // stack. Thus, if we outline, say, half the parameters for a function call 5288 // plus the call, then we'll break the callee's expectations for the layout 5289 // of the stack. 5290 // 5291 // FIXME: Allow calls to functions which construct a stack frame, as long 5292 // as they don't access arguments on the stack. 5293 // FIXME: Figure out some way to analyze functions defined in other modules. 5294 // We should be able to compute the memory usage based on the IR calling 5295 // convention, even if we can't see the definition. 5296 if (MI.isCall()) { 5297 // Get the function associated with the call. Look at each operand and find 5298 // the one that represents the callee and get its name. 5299 const Function *Callee = nullptr; 5300 for (const MachineOperand &MOP : MI.operands()) { 5301 if (MOP.isGlobal()) { 5302 Callee = dyn_cast<Function>(MOP.getGlobal()); 5303 break; 5304 } 5305 } 5306 5307 // Never outline calls to mcount. There isn't any rule that would require 5308 // this, but the Linux kernel's "ftrace" feature depends on it. 5309 if (Callee && Callee->getName() == "\01_mcount") 5310 return outliner::InstrType::Illegal; 5311 5312 // If we don't know anything about the callee, assume it depends on the 5313 // stack layout of the caller. In that case, it's only legal to outline 5314 // as a tail-call. Whitelist the call instructions we know about so we 5315 // don't get unexpected results with call pseudo-instructions. 5316 auto UnknownCallOutlineType = outliner::InstrType::Illegal; 5317 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL) 5318 UnknownCallOutlineType = outliner::InstrType::LegalTerminator; 5319 5320 if (!Callee) 5321 return UnknownCallOutlineType; 5322 5323 // We have a function we have information about. Check it if it's something 5324 // can safely outline. 5325 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 5326 5327 // We don't know what's going on with the callee at all. Don't touch it. 5328 if (!CalleeMF) 5329 return UnknownCallOutlineType; 5330 5331 // Check if we know anything about the callee saves on the function. If we 5332 // don't, then don't touch it, since that implies that we haven't 5333 // computed anything about its stack frame yet. 5334 MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 5335 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 5336 MFI.getNumObjects() > 0) 5337 return UnknownCallOutlineType; 5338 5339 // At this point, we can say that CalleeMF ought to not pass anything on the 5340 // stack. Therefore, we can outline it. 5341 return outliner::InstrType::Legal; 5342 } 5343 5344 // Don't outline positions. 5345 if (MI.isPosition()) 5346 return outliner::InstrType::Illegal; 5347 5348 // Don't touch the link register or W30. 5349 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) || 5350 MI.modifiesRegister(AArch64::W30, &getRegisterInfo())) 5351 return outliner::InstrType::Illegal; 5352 5353 // Don't outline BTI instructions, because that will prevent the outlining 5354 // site from being indirectly callable. 5355 if (MI.getOpcode() == AArch64::HINT) { 5356 int64_t Imm = MI.getOperand(0).getImm(); 5357 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38) 5358 return outliner::InstrType::Illegal; 5359 } 5360 5361 return outliner::InstrType::Legal; 5362 } 5363 5364 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 5365 for (MachineInstr &MI : MBB) { 5366 const MachineOperand *Base; 5367 unsigned Width; 5368 int64_t Offset; 5369 5370 // Is this a load or store with an immediate offset with SP as the base? 5371 if (!MI.mayLoadOrStore() || 5372 !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || 5373 (Base->isReg() && Base->getReg() != AArch64::SP)) 5374 continue; 5375 5376 // It is, so we have to fix it up. 5377 unsigned Scale; 5378 int64_t Dummy1, Dummy2; 5379 5380 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); 5381 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); 5382 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); 5383 assert(Scale != 0 && "Unexpected opcode!"); 5384 5385 // We've pushed the return address to the stack, so add 16 to the offset. 5386 // This is safe, since we already checked if it would overflow when we 5387 // checked if this instruction was legal to outline. 5388 int64_t NewImm = (Offset + 16) / Scale; 5389 StackOffsetOperand.setImm(NewImm); 5390 } 5391 } 5392 5393 void AArch64InstrInfo::buildOutlinedFrame( 5394 MachineBasicBlock &MBB, MachineFunction &MF, 5395 const outliner::OutlinedFunction &OF) const { 5396 // For thunk outlining, rewrite the last instruction from a call to a 5397 // tail-call. 5398 if (OF.FrameConstructionID == MachineOutlinerThunk) { 5399 MachineInstr *Call = &*--MBB.instr_end(); 5400 unsigned TailOpcode; 5401 if (Call->getOpcode() == AArch64::BL) { 5402 TailOpcode = AArch64::TCRETURNdi; 5403 } else { 5404 assert(Call->getOpcode() == AArch64::BLR); 5405 TailOpcode = AArch64::TCRETURNriALL; 5406 } 5407 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) 5408 .add(Call->getOperand(0)) 5409 .addImm(0); 5410 MBB.insert(MBB.end(), TC); 5411 Call->eraseFromParent(); 5412 } 5413 5414 // Is there a call in the outlined range? 5415 auto IsNonTailCall = [](MachineInstr &MI) { 5416 return MI.isCall() && !MI.isReturn(); 5417 }; 5418 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { 5419 // Fix up the instructions in the range, since we're going to modify the 5420 // stack. 5421 assert(OF.FrameConstructionID != MachineOutlinerDefault && 5422 "Can only fix up stack references once"); 5423 fixupPostOutline(MBB); 5424 5425 // LR has to be a live in so that we can save it. 5426 MBB.addLiveIn(AArch64::LR); 5427 5428 MachineBasicBlock::iterator It = MBB.begin(); 5429 MachineBasicBlock::iterator Et = MBB.end(); 5430 5431 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5432 OF.FrameConstructionID == MachineOutlinerThunk) 5433 Et = std::prev(MBB.end()); 5434 5435 // Insert a save before the outlined region 5436 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5437 .addReg(AArch64::SP, RegState::Define) 5438 .addReg(AArch64::LR) 5439 .addReg(AArch64::SP) 5440 .addImm(-16); 5441 It = MBB.insert(It, STRXpre); 5442 5443 const TargetSubtargetInfo &STI = MF.getSubtarget(); 5444 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 5445 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); 5446 5447 // Add a CFI saying the stack was moved 16 B down. 5448 int64_t StackPosEntry = 5449 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16)); 5450 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5451 .addCFIIndex(StackPosEntry) 5452 .setMIFlags(MachineInstr::FrameSetup); 5453 5454 // Add a CFI saying that the LR that we want to find is now 16 B higher than 5455 // before. 5456 int64_t LRPosEntry = 5457 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16)); 5458 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION)) 5459 .addCFIIndex(LRPosEntry) 5460 .setMIFlags(MachineInstr::FrameSetup); 5461 5462 // Insert a restore before the terminator for the function. 5463 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5464 .addReg(AArch64::SP, RegState::Define) 5465 .addReg(AArch64::LR, RegState::Define) 5466 .addReg(AArch64::SP) 5467 .addImm(16); 5468 Et = MBB.insert(Et, LDRXpost); 5469 } 5470 5471 // If this is a tail call outlined function, then there's already a return. 5472 if (OF.FrameConstructionID == MachineOutlinerTailCall || 5473 OF.FrameConstructionID == MachineOutlinerThunk) 5474 return; 5475 5476 // It's not a tail call, so we have to insert the return ourselves. 5477 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) 5478 .addReg(AArch64::LR, RegState::Undef); 5479 MBB.insert(MBB.end(), ret); 5480 5481 // Did we have to modify the stack by saving the link register? 5482 if (OF.FrameConstructionID != MachineOutlinerDefault) 5483 return; 5484 5485 // We modified the stack. 5486 // Walk over the basic block and fix up all the stack accesses. 5487 fixupPostOutline(MBB); 5488 } 5489 5490 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( 5491 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 5492 MachineFunction &MF, const outliner::Candidate &C) const { 5493 5494 // Are we tail calling? 5495 if (C.CallConstructionID == MachineOutlinerTailCall) { 5496 // If yes, then we can just branch to the label. 5497 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi)) 5498 .addGlobalAddress(M.getNamedValue(MF.getName())) 5499 .addImm(0)); 5500 return It; 5501 } 5502 5503 // Are we saving the link register? 5504 if (C.CallConstructionID == MachineOutlinerNoLRSave || 5505 C.CallConstructionID == MachineOutlinerThunk) { 5506 // No, so just insert the call. 5507 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5508 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5509 return It; 5510 } 5511 5512 // We want to return the spot where we inserted the call. 5513 MachineBasicBlock::iterator CallPt; 5514 5515 // Instructions for saving and restoring LR around the call instruction we're 5516 // going to insert. 5517 MachineInstr *Save; 5518 MachineInstr *Restore; 5519 // Can we save to a register? 5520 if (C.CallConstructionID == MachineOutlinerRegSave) { 5521 // FIXME: This logic should be sunk into a target-specific interface so that 5522 // we don't have to recompute the register. 5523 unsigned Reg = findRegisterToSaveLRTo(C); 5524 assert(Reg != 0 && "No callee-saved register available?"); 5525 5526 // Save and restore LR from that register. 5527 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) 5528 .addReg(AArch64::XZR) 5529 .addReg(AArch64::LR) 5530 .addImm(0); 5531 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) 5532 .addReg(AArch64::XZR) 5533 .addReg(Reg) 5534 .addImm(0); 5535 } else { 5536 // We have the default case. Save and restore from SP. 5537 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) 5538 .addReg(AArch64::SP, RegState::Define) 5539 .addReg(AArch64::LR) 5540 .addReg(AArch64::SP) 5541 .addImm(-16); 5542 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) 5543 .addReg(AArch64::SP, RegState::Define) 5544 .addReg(AArch64::LR, RegState::Define) 5545 .addReg(AArch64::SP) 5546 .addImm(16); 5547 } 5548 5549 It = MBB.insert(It, Save); 5550 It++; 5551 5552 // Insert the call. 5553 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) 5554 .addGlobalAddress(M.getNamedValue(MF.getName()))); 5555 CallPt = It; 5556 It++; 5557 5558 It = MBB.insert(It, Restore); 5559 return CallPt; 5560 } 5561 5562 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( 5563 MachineFunction &MF) const { 5564 return MF.getFunction().hasMinSize(); 5565 } 5566 5567 bool AArch64InstrInfo::isCopyInstrImpl( 5568 const MachineInstr &MI, const MachineOperand *&Source, 5569 const MachineOperand *&Destination) const { 5570 5571 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg 5572 // and zero immediate operands used as an alias for mov instruction. 5573 if (MI.getOpcode() == AArch64::ORRWrs && 5574 MI.getOperand(1).getReg() == AArch64::WZR && 5575 MI.getOperand(3).getImm() == 0x0) { 5576 Destination = &MI.getOperand(0); 5577 Source = &MI.getOperand(2); 5578 return true; 5579 } 5580 5581 if (MI.getOpcode() == AArch64::ORRXrs && 5582 MI.getOperand(1).getReg() == AArch64::XZR && 5583 MI.getOperand(3).getImm() == 0x0) { 5584 Destination = &MI.getOperand(0); 5585 Source = &MI.getOperand(2); 5586 return true; 5587 } 5588 5589 return false; 5590 } 5591 5592 #define GET_INSTRINFO_HELPERS 5593 #include "AArch64GenInstrInfo.inc" 5594