1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineModuleInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstrDesc.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetOptions.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <iterator>
50 #include <utility>
51 
52 using namespace llvm;
53 
54 #define GET_INSTRINFO_CTOR_DTOR
55 #include "AArch64GenInstrInfo.inc"
56 
57 static cl::opt<unsigned> TBZDisplacementBits(
58     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 
61 static cl::opt<unsigned> CBZDisplacementBits(
62     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 
65 static cl::opt<unsigned>
66     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 
69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71                           AArch64::CATCHRET),
72       RI(STI.getTargetTriple()), Subtarget(STI) {}
73 
74 /// GetInstSize - Return the number of bytes of code the specified
75 /// instruction may be.  This returns the maximum number of bytes.
76 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
77   const MachineBasicBlock &MBB = *MI.getParent();
78   const MachineFunction *MF = MBB.getParent();
79   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80 
81   {
82     auto Op = MI.getOpcode();
83     if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
84       return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
85   }
86 
87   // Meta-instructions emit no code.
88   if (MI.isMetaInstruction())
89     return 0;
90 
91   // FIXME: We currently only handle pseudoinstructions that don't get expanded
92   //        before the assembly printer.
93   unsigned NumBytes = 0;
94   const MCInstrDesc &Desc = MI.getDesc();
95   switch (Desc.getOpcode()) {
96   default:
97     // Anything not explicitly designated otherwise is a normal 4-byte insn.
98     NumBytes = 4;
99     break;
100   case TargetOpcode::STACKMAP:
101     // The upper bound for a stackmap intrinsic is the full length of its shadow
102     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104     break;
105   case TargetOpcode::PATCHPOINT:
106     // The size of the patchpoint intrinsic is the number of bytes requested
107     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109     break;
110   case AArch64::TLSDESC_CALLSEQ:
111     // This gets lowered to an instruction sequence which takes 16 bytes
112     NumBytes = 16;
113     break;
114   case AArch64::JumpTableDest32:
115   case AArch64::JumpTableDest16:
116   case AArch64::JumpTableDest8:
117     NumBytes = 12;
118     break;
119   case AArch64::SPACE:
120     NumBytes = MI.getOperand(1).getImm();
121     break;
122   case TargetOpcode::BUNDLE:
123     NumBytes = getInstBundleLength(MI);
124     break;
125   }
126 
127   return NumBytes;
128 }
129 
130 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
131   unsigned Size = 0;
132   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
133   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
134   while (++I != E && I->isInsideBundle()) {
135     assert(!I->isBundle() && "No nested bundle!");
136     Size += getInstSizeInBytes(*I);
137   }
138   return Size;
139 }
140 
141 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
142                             SmallVectorImpl<MachineOperand> &Cond) {
143   // Block ends with fall-through condbranch.
144   switch (LastInst->getOpcode()) {
145   default:
146     llvm_unreachable("Unknown branch instruction?");
147   case AArch64::Bcc:
148     Target = LastInst->getOperand(1).getMBB();
149     Cond.push_back(LastInst->getOperand(0));
150     break;
151   case AArch64::CBZW:
152   case AArch64::CBZX:
153   case AArch64::CBNZW:
154   case AArch64::CBNZX:
155     Target = LastInst->getOperand(1).getMBB();
156     Cond.push_back(MachineOperand::CreateImm(-1));
157     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
158     Cond.push_back(LastInst->getOperand(0));
159     break;
160   case AArch64::TBZW:
161   case AArch64::TBZX:
162   case AArch64::TBNZW:
163   case AArch64::TBNZX:
164     Target = LastInst->getOperand(2).getMBB();
165     Cond.push_back(MachineOperand::CreateImm(-1));
166     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
167     Cond.push_back(LastInst->getOperand(0));
168     Cond.push_back(LastInst->getOperand(1));
169   }
170 }
171 
172 static unsigned getBranchDisplacementBits(unsigned Opc) {
173   switch (Opc) {
174   default:
175     llvm_unreachable("unexpected opcode!");
176   case AArch64::B:
177     return 64;
178   case AArch64::TBNZW:
179   case AArch64::TBZW:
180   case AArch64::TBNZX:
181   case AArch64::TBZX:
182     return TBZDisplacementBits;
183   case AArch64::CBNZW:
184   case AArch64::CBZW:
185   case AArch64::CBNZX:
186   case AArch64::CBZX:
187     return CBZDisplacementBits;
188   case AArch64::Bcc:
189     return BCCDisplacementBits;
190   }
191 }
192 
193 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
194                                              int64_t BrOffset) const {
195   unsigned Bits = getBranchDisplacementBits(BranchOp);
196   assert(Bits >= 3 && "max branch displacement must be enough to jump"
197                       "over conditional branch expansion");
198   return isIntN(Bits, BrOffset / 4);
199 }
200 
201 MachineBasicBlock *
202 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
203   switch (MI.getOpcode()) {
204   default:
205     llvm_unreachable("unexpected opcode!");
206   case AArch64::B:
207     return MI.getOperand(0).getMBB();
208   case AArch64::TBZW:
209   case AArch64::TBNZW:
210   case AArch64::TBZX:
211   case AArch64::TBNZX:
212     return MI.getOperand(2).getMBB();
213   case AArch64::CBZW:
214   case AArch64::CBNZW:
215   case AArch64::CBZX:
216   case AArch64::CBNZX:
217   case AArch64::Bcc:
218     return MI.getOperand(1).getMBB();
219   }
220 }
221 
222 // Branch analysis.
223 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
224                                      MachineBasicBlock *&TBB,
225                                      MachineBasicBlock *&FBB,
226                                      SmallVectorImpl<MachineOperand> &Cond,
227                                      bool AllowModify) const {
228   // If the block has no terminators, it just falls into the block after it.
229   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
230   if (I == MBB.end())
231     return false;
232 
233   if (!isUnpredicatedTerminator(*I))
234     return false;
235 
236   // Get the last instruction in the block.
237   MachineInstr *LastInst = &*I;
238 
239   // If there is only one terminator instruction, process it.
240   unsigned LastOpc = LastInst->getOpcode();
241   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
242     if (isUncondBranchOpcode(LastOpc)) {
243       TBB = LastInst->getOperand(0).getMBB();
244       return false;
245     }
246     if (isCondBranchOpcode(LastOpc)) {
247       // Block ends with fall-through condbranch.
248       parseCondBranch(LastInst, TBB, Cond);
249       return false;
250     }
251     return true; // Can't handle indirect branch.
252   }
253 
254   // Get the instruction before it if it is a terminator.
255   MachineInstr *SecondLastInst = &*I;
256   unsigned SecondLastOpc = SecondLastInst->getOpcode();
257 
258   // If AllowModify is true and the block ends with two or more unconditional
259   // branches, delete all but the first unconditional branch.
260   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
261     while (isUncondBranchOpcode(SecondLastOpc)) {
262       LastInst->eraseFromParent();
263       LastInst = SecondLastInst;
264       LastOpc = LastInst->getOpcode();
265       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
266         // Return now the only terminator is an unconditional branch.
267         TBB = LastInst->getOperand(0).getMBB();
268         return false;
269       } else {
270         SecondLastInst = &*I;
271         SecondLastOpc = SecondLastInst->getOpcode();
272       }
273     }
274   }
275 
276   // If there are three terminators, we don't know what sort of block this is.
277   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
278     return true;
279 
280   // If the block ends with a B and a Bcc, handle it.
281   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
282     parseCondBranch(SecondLastInst, TBB, Cond);
283     FBB = LastInst->getOperand(0).getMBB();
284     return false;
285   }
286 
287   // If the block ends with two unconditional branches, handle it.  The second
288   // one is not executed, so remove it.
289   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
290     TBB = SecondLastInst->getOperand(0).getMBB();
291     I = LastInst;
292     if (AllowModify)
293       I->eraseFromParent();
294     return false;
295   }
296 
297   // ...likewise if it ends with an indirect branch followed by an unconditional
298   // branch.
299   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
300     I = LastInst;
301     if (AllowModify)
302       I->eraseFromParent();
303     return true;
304   }
305 
306   // Otherwise, can't handle this.
307   return true;
308 }
309 
310 bool AArch64InstrInfo::reverseBranchCondition(
311     SmallVectorImpl<MachineOperand> &Cond) const {
312   if (Cond[0].getImm() != -1) {
313     // Regular Bcc
314     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
315     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
316   } else {
317     // Folded compare-and-branch
318     switch (Cond[1].getImm()) {
319     default:
320       llvm_unreachable("Unknown conditional branch!");
321     case AArch64::CBZW:
322       Cond[1].setImm(AArch64::CBNZW);
323       break;
324     case AArch64::CBNZW:
325       Cond[1].setImm(AArch64::CBZW);
326       break;
327     case AArch64::CBZX:
328       Cond[1].setImm(AArch64::CBNZX);
329       break;
330     case AArch64::CBNZX:
331       Cond[1].setImm(AArch64::CBZX);
332       break;
333     case AArch64::TBZW:
334       Cond[1].setImm(AArch64::TBNZW);
335       break;
336     case AArch64::TBNZW:
337       Cond[1].setImm(AArch64::TBZW);
338       break;
339     case AArch64::TBZX:
340       Cond[1].setImm(AArch64::TBNZX);
341       break;
342     case AArch64::TBNZX:
343       Cond[1].setImm(AArch64::TBZX);
344       break;
345     }
346   }
347 
348   return false;
349 }
350 
351 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
352                                         int *BytesRemoved) const {
353   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
354   if (I == MBB.end())
355     return 0;
356 
357   if (!isUncondBranchOpcode(I->getOpcode()) &&
358       !isCondBranchOpcode(I->getOpcode()))
359     return 0;
360 
361   // Remove the branch.
362   I->eraseFromParent();
363 
364   I = MBB.end();
365 
366   if (I == MBB.begin()) {
367     if (BytesRemoved)
368       *BytesRemoved = 4;
369     return 1;
370   }
371   --I;
372   if (!isCondBranchOpcode(I->getOpcode())) {
373     if (BytesRemoved)
374       *BytesRemoved = 4;
375     return 1;
376   }
377 
378   // Remove the branch.
379   I->eraseFromParent();
380   if (BytesRemoved)
381     *BytesRemoved = 8;
382 
383   return 2;
384 }
385 
386 void AArch64InstrInfo::instantiateCondBranch(
387     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
388     ArrayRef<MachineOperand> Cond) const {
389   if (Cond[0].getImm() != -1) {
390     // Regular Bcc
391     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
392   } else {
393     // Folded compare-and-branch
394     // Note that we use addOperand instead of addReg to keep the flags.
395     const MachineInstrBuilder MIB =
396         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
397     if (Cond.size() > 3)
398       MIB.addImm(Cond[3].getImm());
399     MIB.addMBB(TBB);
400   }
401 }
402 
403 unsigned AArch64InstrInfo::insertBranch(
404     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
405     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
406   // Shouldn't be a fall through.
407   assert(TBB && "insertBranch must not be told to insert a fallthrough");
408 
409   if (!FBB) {
410     if (Cond.empty()) // Unconditional branch?
411       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
412     else
413       instantiateCondBranch(MBB, DL, TBB, Cond);
414 
415     if (BytesAdded)
416       *BytesAdded = 4;
417 
418     return 1;
419   }
420 
421   // Two-way conditional branch.
422   instantiateCondBranch(MBB, DL, TBB, Cond);
423   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
424 
425   if (BytesAdded)
426     *BytesAdded = 8;
427 
428   return 2;
429 }
430 
431 // Find the original register that VReg is copied from.
432 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
433   while (Register::isVirtualRegister(VReg)) {
434     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
435     if (!DefMI->isFullCopy())
436       return VReg;
437     VReg = DefMI->getOperand(1).getReg();
438   }
439   return VReg;
440 }
441 
442 // Determine if VReg is defined by an instruction that can be folded into a
443 // csel instruction. If so, return the folded opcode, and the replacement
444 // register.
445 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
446                                 unsigned *NewVReg = nullptr) {
447   VReg = removeCopies(MRI, VReg);
448   if (!Register::isVirtualRegister(VReg))
449     return 0;
450 
451   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
452   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
453   unsigned Opc = 0;
454   unsigned SrcOpNum = 0;
455   switch (DefMI->getOpcode()) {
456   case AArch64::ADDSXri:
457   case AArch64::ADDSWri:
458     // if NZCV is used, do not fold.
459     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
460       return 0;
461     // fall-through to ADDXri and ADDWri.
462     LLVM_FALLTHROUGH;
463   case AArch64::ADDXri:
464   case AArch64::ADDWri:
465     // add x, 1 -> csinc.
466     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
467         DefMI->getOperand(3).getImm() != 0)
468       return 0;
469     SrcOpNum = 1;
470     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
471     break;
472 
473   case AArch64::ORNXrr:
474   case AArch64::ORNWrr: {
475     // not x -> csinv, represented as orn dst, xzr, src.
476     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
477     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
478       return 0;
479     SrcOpNum = 2;
480     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
481     break;
482   }
483 
484   case AArch64::SUBSXrr:
485   case AArch64::SUBSWrr:
486     // if NZCV is used, do not fold.
487     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
488       return 0;
489     // fall-through to SUBXrr and SUBWrr.
490     LLVM_FALLTHROUGH;
491   case AArch64::SUBXrr:
492   case AArch64::SUBWrr: {
493     // neg x -> csneg, represented as sub dst, xzr, src.
494     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
495     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
496       return 0;
497     SrcOpNum = 2;
498     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
499     break;
500   }
501   default:
502     return 0;
503   }
504   assert(Opc && SrcOpNum && "Missing parameters");
505 
506   if (NewVReg)
507     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
508   return Opc;
509 }
510 
511 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
512                                        ArrayRef<MachineOperand> Cond,
513                                        Register DstReg, Register TrueReg,
514                                        Register FalseReg, int &CondCycles,
515                                        int &TrueCycles,
516                                        int &FalseCycles) const {
517   // Check register classes.
518   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
519   const TargetRegisterClass *RC =
520       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
521   if (!RC)
522     return false;
523 
524   // Also need to check the dest regclass, in case we're trying to optimize
525   // something like:
526   // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
527   if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
528     return false;
529 
530   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
531   unsigned ExtraCondLat = Cond.size() != 1;
532 
533   // GPRs are handled by csel.
534   // FIXME: Fold in x+1, -x, and ~x when applicable.
535   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
536       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
537     // Single-cycle csel, csinc, csinv, and csneg.
538     CondCycles = 1 + ExtraCondLat;
539     TrueCycles = FalseCycles = 1;
540     if (canFoldIntoCSel(MRI, TrueReg))
541       TrueCycles = 0;
542     else if (canFoldIntoCSel(MRI, FalseReg))
543       FalseCycles = 0;
544     return true;
545   }
546 
547   // Scalar floating point is handled by fcsel.
548   // FIXME: Form fabs, fmin, and fmax when applicable.
549   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
550       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
551     CondCycles = 5 + ExtraCondLat;
552     TrueCycles = FalseCycles = 2;
553     return true;
554   }
555 
556   // Can't do vectors.
557   return false;
558 }
559 
560 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
561                                     MachineBasicBlock::iterator I,
562                                     const DebugLoc &DL, Register DstReg,
563                                     ArrayRef<MachineOperand> Cond,
564                                     Register TrueReg, Register FalseReg) const {
565   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
566 
567   // Parse the condition code, see parseCondBranch() above.
568   AArch64CC::CondCode CC;
569   switch (Cond.size()) {
570   default:
571     llvm_unreachable("Unknown condition opcode in Cond");
572   case 1: // b.cc
573     CC = AArch64CC::CondCode(Cond[0].getImm());
574     break;
575   case 3: { // cbz/cbnz
576     // We must insert a compare against 0.
577     bool Is64Bit;
578     switch (Cond[1].getImm()) {
579     default:
580       llvm_unreachable("Unknown branch opcode in Cond");
581     case AArch64::CBZW:
582       Is64Bit = false;
583       CC = AArch64CC::EQ;
584       break;
585     case AArch64::CBZX:
586       Is64Bit = true;
587       CC = AArch64CC::EQ;
588       break;
589     case AArch64::CBNZW:
590       Is64Bit = false;
591       CC = AArch64CC::NE;
592       break;
593     case AArch64::CBNZX:
594       Is64Bit = true;
595       CC = AArch64CC::NE;
596       break;
597     }
598     Register SrcReg = Cond[2].getReg();
599     if (Is64Bit) {
600       // cmp reg, #0 is actually subs xzr, reg, #0.
601       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
602       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
603           .addReg(SrcReg)
604           .addImm(0)
605           .addImm(0);
606     } else {
607       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
608       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
609           .addReg(SrcReg)
610           .addImm(0)
611           .addImm(0);
612     }
613     break;
614   }
615   case 4: { // tbz/tbnz
616     // We must insert a tst instruction.
617     switch (Cond[1].getImm()) {
618     default:
619       llvm_unreachable("Unknown branch opcode in Cond");
620     case AArch64::TBZW:
621     case AArch64::TBZX:
622       CC = AArch64CC::EQ;
623       break;
624     case AArch64::TBNZW:
625     case AArch64::TBNZX:
626       CC = AArch64CC::NE;
627       break;
628     }
629     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
630     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
631       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
632           .addReg(Cond[2].getReg())
633           .addImm(
634               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
635     else
636       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
637           .addReg(Cond[2].getReg())
638           .addImm(
639               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
640     break;
641   }
642   }
643 
644   unsigned Opc = 0;
645   const TargetRegisterClass *RC = nullptr;
646   bool TryFold = false;
647   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
648     RC = &AArch64::GPR64RegClass;
649     Opc = AArch64::CSELXr;
650     TryFold = true;
651   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
652     RC = &AArch64::GPR32RegClass;
653     Opc = AArch64::CSELWr;
654     TryFold = true;
655   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
656     RC = &AArch64::FPR64RegClass;
657     Opc = AArch64::FCSELDrrr;
658   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
659     RC = &AArch64::FPR32RegClass;
660     Opc = AArch64::FCSELSrrr;
661   }
662   assert(RC && "Unsupported regclass");
663 
664   // Try folding simple instructions into the csel.
665   if (TryFold) {
666     unsigned NewVReg = 0;
667     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
668     if (FoldedOpc) {
669       // The folded opcodes csinc, csinc and csneg apply the operation to
670       // FalseReg, so we need to invert the condition.
671       CC = AArch64CC::getInvertedCondCode(CC);
672       TrueReg = FalseReg;
673     } else
674       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
675 
676     // Fold the operation. Leave any dead instructions for DCE to clean up.
677     if (FoldedOpc) {
678       FalseReg = NewVReg;
679       Opc = FoldedOpc;
680       // The extends the live range of NewVReg.
681       MRI.clearKillFlags(NewVReg);
682     }
683   }
684 
685   // Pull all virtual register into the appropriate class.
686   MRI.constrainRegClass(TrueReg, RC);
687   MRI.constrainRegClass(FalseReg, RC);
688 
689   // Insert the csel.
690   BuildMI(MBB, I, DL, get(Opc), DstReg)
691       .addReg(TrueReg)
692       .addReg(FalseReg)
693       .addImm(CC);
694 }
695 
696 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
697 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
698   uint64_t Imm = MI.getOperand(1).getImm();
699   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
700   uint64_t Encoding;
701   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
702 }
703 
704 // FIXME: this implementation should be micro-architecture dependent, so a
705 // micro-architecture target hook should be introduced here in future.
706 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
707   if (!Subtarget.hasCustomCheapAsMoveHandling())
708     return MI.isAsCheapAsAMove();
709 
710   const unsigned Opcode = MI.getOpcode();
711 
712   // Firstly, check cases gated by features.
713 
714   if (Subtarget.hasZeroCycleZeroingFP()) {
715     if (Opcode == AArch64::FMOVH0 ||
716         Opcode == AArch64::FMOVS0 ||
717         Opcode == AArch64::FMOVD0)
718       return true;
719   }
720 
721   if (Subtarget.hasZeroCycleZeroingGP()) {
722     if (Opcode == TargetOpcode::COPY &&
723         (MI.getOperand(1).getReg() == AArch64::WZR ||
724          MI.getOperand(1).getReg() == AArch64::XZR))
725       return true;
726   }
727 
728   // Secondly, check cases specific to sub-targets.
729 
730   if (Subtarget.hasExynosCheapAsMoveHandling()) {
731     if (isExynosCheapAsMove(MI))
732       return true;
733 
734     return MI.isAsCheapAsAMove();
735   }
736 
737   // Finally, check generic cases.
738 
739   switch (Opcode) {
740   default:
741     return false;
742 
743   // add/sub on register without shift
744   case AArch64::ADDWri:
745   case AArch64::ADDXri:
746   case AArch64::SUBWri:
747   case AArch64::SUBXri:
748     return (MI.getOperand(3).getImm() == 0);
749 
750   // logical ops on immediate
751   case AArch64::ANDWri:
752   case AArch64::ANDXri:
753   case AArch64::EORWri:
754   case AArch64::EORXri:
755   case AArch64::ORRWri:
756   case AArch64::ORRXri:
757     return true;
758 
759   // logical ops on register without shift
760   case AArch64::ANDWrr:
761   case AArch64::ANDXrr:
762   case AArch64::BICWrr:
763   case AArch64::BICXrr:
764   case AArch64::EONWrr:
765   case AArch64::EONXrr:
766   case AArch64::EORWrr:
767   case AArch64::EORXrr:
768   case AArch64::ORNWrr:
769   case AArch64::ORNXrr:
770   case AArch64::ORRWrr:
771   case AArch64::ORRXrr:
772     return true;
773 
774   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
775   // ORRXri, it is as cheap as MOV
776   case AArch64::MOVi32imm:
777     return canBeExpandedToORR(MI, 32);
778   case AArch64::MOVi64imm:
779     return canBeExpandedToORR(MI, 64);
780   }
781 
782   llvm_unreachable("Unknown opcode to check as cheap as a move!");
783 }
784 
785 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
786   switch (MI.getOpcode()) {
787   default:
788     return false;
789 
790   case AArch64::ADDWrs:
791   case AArch64::ADDXrs:
792   case AArch64::ADDSWrs:
793   case AArch64::ADDSXrs: {
794     unsigned Imm = MI.getOperand(3).getImm();
795     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
796     if (ShiftVal == 0)
797       return true;
798     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
799   }
800 
801   case AArch64::ADDWrx:
802   case AArch64::ADDXrx:
803   case AArch64::ADDXrx64:
804   case AArch64::ADDSWrx:
805   case AArch64::ADDSXrx:
806   case AArch64::ADDSXrx64: {
807     unsigned Imm = MI.getOperand(3).getImm();
808     switch (AArch64_AM::getArithExtendType(Imm)) {
809     default:
810       return false;
811     case AArch64_AM::UXTB:
812     case AArch64_AM::UXTH:
813     case AArch64_AM::UXTW:
814     case AArch64_AM::UXTX:
815       return AArch64_AM::getArithShiftValue(Imm) <= 4;
816     }
817   }
818 
819   case AArch64::SUBWrs:
820   case AArch64::SUBSWrs: {
821     unsigned Imm = MI.getOperand(3).getImm();
822     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
823     return ShiftVal == 0 ||
824            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
825   }
826 
827   case AArch64::SUBXrs:
828   case AArch64::SUBSXrs: {
829     unsigned Imm = MI.getOperand(3).getImm();
830     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
831     return ShiftVal == 0 ||
832            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
833   }
834 
835   case AArch64::SUBWrx:
836   case AArch64::SUBXrx:
837   case AArch64::SUBXrx64:
838   case AArch64::SUBSWrx:
839   case AArch64::SUBSXrx:
840   case AArch64::SUBSXrx64: {
841     unsigned Imm = MI.getOperand(3).getImm();
842     switch (AArch64_AM::getArithExtendType(Imm)) {
843     default:
844       return false;
845     case AArch64_AM::UXTB:
846     case AArch64_AM::UXTH:
847     case AArch64_AM::UXTW:
848     case AArch64_AM::UXTX:
849       return AArch64_AM::getArithShiftValue(Imm) == 0;
850     }
851   }
852 
853   case AArch64::LDRBBroW:
854   case AArch64::LDRBBroX:
855   case AArch64::LDRBroW:
856   case AArch64::LDRBroX:
857   case AArch64::LDRDroW:
858   case AArch64::LDRDroX:
859   case AArch64::LDRHHroW:
860   case AArch64::LDRHHroX:
861   case AArch64::LDRHroW:
862   case AArch64::LDRHroX:
863   case AArch64::LDRQroW:
864   case AArch64::LDRQroX:
865   case AArch64::LDRSBWroW:
866   case AArch64::LDRSBWroX:
867   case AArch64::LDRSBXroW:
868   case AArch64::LDRSBXroX:
869   case AArch64::LDRSHWroW:
870   case AArch64::LDRSHWroX:
871   case AArch64::LDRSHXroW:
872   case AArch64::LDRSHXroX:
873   case AArch64::LDRSWroW:
874   case AArch64::LDRSWroX:
875   case AArch64::LDRSroW:
876   case AArch64::LDRSroX:
877   case AArch64::LDRWroW:
878   case AArch64::LDRWroX:
879   case AArch64::LDRXroW:
880   case AArch64::LDRXroX:
881   case AArch64::PRFMroW:
882   case AArch64::PRFMroX:
883   case AArch64::STRBBroW:
884   case AArch64::STRBBroX:
885   case AArch64::STRBroW:
886   case AArch64::STRBroX:
887   case AArch64::STRDroW:
888   case AArch64::STRDroX:
889   case AArch64::STRHHroW:
890   case AArch64::STRHHroX:
891   case AArch64::STRHroW:
892   case AArch64::STRHroX:
893   case AArch64::STRQroW:
894   case AArch64::STRQroX:
895   case AArch64::STRSroW:
896   case AArch64::STRSroX:
897   case AArch64::STRWroW:
898   case AArch64::STRWroX:
899   case AArch64::STRXroW:
900   case AArch64::STRXroX: {
901     unsigned IsSigned = MI.getOperand(3).getImm();
902     return !IsSigned;
903   }
904   }
905 }
906 
907 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
908   unsigned Opc = MI.getOpcode();
909   switch (Opc) {
910     default:
911       return false;
912     case AArch64::SEH_StackAlloc:
913     case AArch64::SEH_SaveFPLR:
914     case AArch64::SEH_SaveFPLR_X:
915     case AArch64::SEH_SaveReg:
916     case AArch64::SEH_SaveReg_X:
917     case AArch64::SEH_SaveRegP:
918     case AArch64::SEH_SaveRegP_X:
919     case AArch64::SEH_SaveFReg:
920     case AArch64::SEH_SaveFReg_X:
921     case AArch64::SEH_SaveFRegP:
922     case AArch64::SEH_SaveFRegP_X:
923     case AArch64::SEH_SetFP:
924     case AArch64::SEH_AddFP:
925     case AArch64::SEH_Nop:
926     case AArch64::SEH_PrologEnd:
927     case AArch64::SEH_EpilogStart:
928     case AArch64::SEH_EpilogEnd:
929       return true;
930   }
931 }
932 
933 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
934                                              Register &SrcReg, Register &DstReg,
935                                              unsigned &SubIdx) const {
936   switch (MI.getOpcode()) {
937   default:
938     return false;
939   case AArch64::SBFMXri: // aka sxtw
940   case AArch64::UBFMXri: // aka uxtw
941     // Check for the 32 -> 64 bit extension case, these instructions can do
942     // much more.
943     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
944       return false;
945     // This is a signed or unsigned 32 -> 64 bit extension.
946     SrcReg = MI.getOperand(1).getReg();
947     DstReg = MI.getOperand(0).getReg();
948     SubIdx = AArch64::sub_32;
949     return true;
950   }
951 }
952 
953 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
954     const MachineInstr &MIa, const MachineInstr &MIb) const {
955   const TargetRegisterInfo *TRI = &getRegisterInfo();
956   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
957   int64_t OffsetA = 0, OffsetB = 0;
958   unsigned WidthA = 0, WidthB = 0;
959   bool OffsetAIsScalable = false, OffsetBIsScalable = false;
960 
961   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
962   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
963 
964   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
965       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
966     return false;
967 
968   // Retrieve the base, offset from the base and width. Width
969   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
970   // base are identical, and the offset of a lower memory access +
971   // the width doesn't overlap the offset of a higher memory access,
972   // then the memory accesses are different.
973   // If OffsetAIsScalable and OffsetBIsScalable are both true, they
974   // are assumed to have the same scale (vscale).
975   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
976                                    WidthA, TRI) &&
977       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
978                                    WidthB, TRI)) {
979     if (BaseOpA->isIdenticalTo(*BaseOpB) &&
980         OffsetAIsScalable == OffsetBIsScalable) {
981       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
982       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
983       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
984       if (LowOffset + LowWidth <= HighOffset)
985         return true;
986     }
987   }
988   return false;
989 }
990 
991 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
992                                             const MachineBasicBlock *MBB,
993                                             const MachineFunction &MF) const {
994   if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
995     return true;
996   switch (MI.getOpcode()) {
997   case AArch64::HINT:
998     // CSDB hints are scheduling barriers.
999     if (MI.getOperand(0).getImm() == 0x14)
1000       return true;
1001     break;
1002   case AArch64::DSB:
1003   case AArch64::ISB:
1004     // DSB and ISB also are scheduling barriers.
1005     return true;
1006   default:;
1007   }
1008   return isSEHInstruction(MI);
1009 }
1010 
1011 /// analyzeCompare - For a comparison instruction, return the source registers
1012 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1013 /// Return true if the comparison instruction can be analyzed.
1014 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1015                                       Register &SrcReg2, int &CmpMask,
1016                                       int &CmpValue) const {
1017   // The first operand can be a frame index where we'd normally expect a
1018   // register.
1019   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1020   if (!MI.getOperand(1).isReg())
1021     return false;
1022 
1023   switch (MI.getOpcode()) {
1024   default:
1025     break;
1026   case AArch64::SUBSWrr:
1027   case AArch64::SUBSWrs:
1028   case AArch64::SUBSWrx:
1029   case AArch64::SUBSXrr:
1030   case AArch64::SUBSXrs:
1031   case AArch64::SUBSXrx:
1032   case AArch64::ADDSWrr:
1033   case AArch64::ADDSWrs:
1034   case AArch64::ADDSWrx:
1035   case AArch64::ADDSXrr:
1036   case AArch64::ADDSXrs:
1037   case AArch64::ADDSXrx:
1038     // Replace SUBSWrr with SUBWrr if NZCV is not used.
1039     SrcReg = MI.getOperand(1).getReg();
1040     SrcReg2 = MI.getOperand(2).getReg();
1041     CmpMask = ~0;
1042     CmpValue = 0;
1043     return true;
1044   case AArch64::SUBSWri:
1045   case AArch64::ADDSWri:
1046   case AArch64::SUBSXri:
1047   case AArch64::ADDSXri:
1048     SrcReg = MI.getOperand(1).getReg();
1049     SrcReg2 = 0;
1050     CmpMask = ~0;
1051     // FIXME: In order to convert CmpValue to 0 or 1
1052     CmpValue = MI.getOperand(2).getImm() != 0;
1053     return true;
1054   case AArch64::ANDSWri:
1055   case AArch64::ANDSXri:
1056     // ANDS does not use the same encoding scheme as the others xxxS
1057     // instructions.
1058     SrcReg = MI.getOperand(1).getReg();
1059     SrcReg2 = 0;
1060     CmpMask = ~0;
1061     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1062     // while the type of CmpValue is int. When converting uint64_t to int,
1063     // the high 32 bits of uint64_t will be lost.
1064     // In fact it causes a bug in spec2006-483.xalancbmk
1065     // CmpValue is only used to compare with zero in OptimizeCompareInstr
1066     CmpValue = AArch64_AM::decodeLogicalImmediate(
1067                    MI.getOperand(2).getImm(),
1068                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1069     return true;
1070   }
1071 
1072   return false;
1073 }
1074 
1075 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1076   MachineBasicBlock *MBB = Instr.getParent();
1077   assert(MBB && "Can't get MachineBasicBlock here");
1078   MachineFunction *MF = MBB->getParent();
1079   assert(MF && "Can't get MachineFunction here");
1080   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1081   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1082   MachineRegisterInfo *MRI = &MF->getRegInfo();
1083 
1084   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1085        ++OpIdx) {
1086     MachineOperand &MO = Instr.getOperand(OpIdx);
1087     const TargetRegisterClass *OpRegCstraints =
1088         Instr.getRegClassConstraint(OpIdx, TII, TRI);
1089 
1090     // If there's no constraint, there's nothing to do.
1091     if (!OpRegCstraints)
1092       continue;
1093     // If the operand is a frame index, there's nothing to do here.
1094     // A frame index operand will resolve correctly during PEI.
1095     if (MO.isFI())
1096       continue;
1097 
1098     assert(MO.isReg() &&
1099            "Operand has register constraints without being a register!");
1100 
1101     Register Reg = MO.getReg();
1102     if (Register::isPhysicalRegister(Reg)) {
1103       if (!OpRegCstraints->contains(Reg))
1104         return false;
1105     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1106                !MRI->constrainRegClass(Reg, OpRegCstraints))
1107       return false;
1108   }
1109 
1110   return true;
1111 }
1112 
1113 /// Return the opcode that does not set flags when possible - otherwise
1114 /// return the original opcode. The caller is responsible to do the actual
1115 /// substitution and legality checking.
1116 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1117   // Don't convert all compare instructions, because for some the zero register
1118   // encoding becomes the sp register.
1119   bool MIDefinesZeroReg = false;
1120   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1121     MIDefinesZeroReg = true;
1122 
1123   switch (MI.getOpcode()) {
1124   default:
1125     return MI.getOpcode();
1126   case AArch64::ADDSWrr:
1127     return AArch64::ADDWrr;
1128   case AArch64::ADDSWri:
1129     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1130   case AArch64::ADDSWrs:
1131     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1132   case AArch64::ADDSWrx:
1133     return AArch64::ADDWrx;
1134   case AArch64::ADDSXrr:
1135     return AArch64::ADDXrr;
1136   case AArch64::ADDSXri:
1137     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1138   case AArch64::ADDSXrs:
1139     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1140   case AArch64::ADDSXrx:
1141     return AArch64::ADDXrx;
1142   case AArch64::SUBSWrr:
1143     return AArch64::SUBWrr;
1144   case AArch64::SUBSWri:
1145     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1146   case AArch64::SUBSWrs:
1147     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1148   case AArch64::SUBSWrx:
1149     return AArch64::SUBWrx;
1150   case AArch64::SUBSXrr:
1151     return AArch64::SUBXrr;
1152   case AArch64::SUBSXri:
1153     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1154   case AArch64::SUBSXrs:
1155     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1156   case AArch64::SUBSXrx:
1157     return AArch64::SUBXrx;
1158   }
1159 }
1160 
1161 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1162 
1163 /// True when condition flags are accessed (either by writing or reading)
1164 /// on the instruction trace starting at From and ending at To.
1165 ///
1166 /// Note: If From and To are from different blocks it's assumed CC are accessed
1167 ///       on the path.
1168 static bool areCFlagsAccessedBetweenInstrs(
1169     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1170     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1171   // Early exit if To is at the beginning of the BB.
1172   if (To == To->getParent()->begin())
1173     return true;
1174 
1175   // Check whether the instructions are in the same basic block
1176   // If not, assume the condition flags might get modified somewhere.
1177   if (To->getParent() != From->getParent())
1178     return true;
1179 
1180   // From must be above To.
1181   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1182                       [From](MachineInstr &MI) {
1183                         return MI.getIterator() == From;
1184                       }) != To->getParent()->rend());
1185 
1186   // We iterate backward starting \p To until we hit \p From.
1187   for (--To; To != From; --To) {
1188     const MachineInstr &Instr = *To;
1189 
1190     if (((AccessToCheck & AK_Write) &&
1191          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1192         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1193       return true;
1194   }
1195   return false;
1196 }
1197 
1198 /// Try to optimize a compare instruction. A compare instruction is an
1199 /// instruction which produces AArch64::NZCV. It can be truly compare
1200 /// instruction
1201 /// when there are no uses of its destination register.
1202 ///
1203 /// The following steps are tried in order:
1204 /// 1. Convert CmpInstr into an unconditional version.
1205 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1206 ///    condition code or an instruction which can be converted into such an
1207 ///    instruction.
1208 ///    Only comparison with zero is supported.
1209 bool AArch64InstrInfo::optimizeCompareInstr(
1210     MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
1211     int CmpValue, const MachineRegisterInfo *MRI) const {
1212   assert(CmpInstr.getParent());
1213   assert(MRI);
1214 
1215   // Replace SUBSWrr with SUBWrr if NZCV is not used.
1216   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1217   if (DeadNZCVIdx != -1) {
1218     if (CmpInstr.definesRegister(AArch64::WZR) ||
1219         CmpInstr.definesRegister(AArch64::XZR)) {
1220       CmpInstr.eraseFromParent();
1221       return true;
1222     }
1223     unsigned Opc = CmpInstr.getOpcode();
1224     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1225     if (NewOpc == Opc)
1226       return false;
1227     const MCInstrDesc &MCID = get(NewOpc);
1228     CmpInstr.setDesc(MCID);
1229     CmpInstr.RemoveOperand(DeadNZCVIdx);
1230     bool succeeded = UpdateOperandRegClass(CmpInstr);
1231     (void)succeeded;
1232     assert(succeeded && "Some operands reg class are incompatible!");
1233     return true;
1234   }
1235 
1236   // Continue only if we have a "ri" where immediate is zero.
1237   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1238   // function.
1239   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1240   if (CmpValue != 0 || SrcReg2 != 0)
1241     return false;
1242 
1243   // CmpInstr is a Compare instruction if destination register is not used.
1244   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1245     return false;
1246 
1247   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1248 }
1249 
1250 /// Get opcode of S version of Instr.
1251 /// If Instr is S version its opcode is returned.
1252 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1253 /// or we are not interested in it.
1254 static unsigned sForm(MachineInstr &Instr) {
1255   switch (Instr.getOpcode()) {
1256   default:
1257     return AArch64::INSTRUCTION_LIST_END;
1258 
1259   case AArch64::ADDSWrr:
1260   case AArch64::ADDSWri:
1261   case AArch64::ADDSXrr:
1262   case AArch64::ADDSXri:
1263   case AArch64::SUBSWrr:
1264   case AArch64::SUBSWri:
1265   case AArch64::SUBSXrr:
1266   case AArch64::SUBSXri:
1267     return Instr.getOpcode();
1268 
1269   case AArch64::ADDWrr:
1270     return AArch64::ADDSWrr;
1271   case AArch64::ADDWri:
1272     return AArch64::ADDSWri;
1273   case AArch64::ADDXrr:
1274     return AArch64::ADDSXrr;
1275   case AArch64::ADDXri:
1276     return AArch64::ADDSXri;
1277   case AArch64::ADCWr:
1278     return AArch64::ADCSWr;
1279   case AArch64::ADCXr:
1280     return AArch64::ADCSXr;
1281   case AArch64::SUBWrr:
1282     return AArch64::SUBSWrr;
1283   case AArch64::SUBWri:
1284     return AArch64::SUBSWri;
1285   case AArch64::SUBXrr:
1286     return AArch64::SUBSXrr;
1287   case AArch64::SUBXri:
1288     return AArch64::SUBSXri;
1289   case AArch64::SBCWr:
1290     return AArch64::SBCSWr;
1291   case AArch64::SBCXr:
1292     return AArch64::SBCSXr;
1293   case AArch64::ANDWri:
1294     return AArch64::ANDSWri;
1295   case AArch64::ANDXri:
1296     return AArch64::ANDSXri;
1297   }
1298 }
1299 
1300 /// Check if AArch64::NZCV should be alive in successors of MBB.
1301 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1302   for (auto *BB : MBB->successors())
1303     if (BB->isLiveIn(AArch64::NZCV))
1304       return true;
1305   return false;
1306 }
1307 
1308 namespace {
1309 
1310 struct UsedNZCV {
1311   bool N = false;
1312   bool Z = false;
1313   bool C = false;
1314   bool V = false;
1315 
1316   UsedNZCV() = default;
1317 
1318   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1319     this->N |= UsedFlags.N;
1320     this->Z |= UsedFlags.Z;
1321     this->C |= UsedFlags.C;
1322     this->V |= UsedFlags.V;
1323     return *this;
1324   }
1325 };
1326 
1327 } // end anonymous namespace
1328 
1329 /// Find a condition code used by the instruction.
1330 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1331 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1332 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1333   switch (Instr.getOpcode()) {
1334   default:
1335     return AArch64CC::Invalid;
1336 
1337   case AArch64::Bcc: {
1338     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1339     assert(Idx >= 2);
1340     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1341   }
1342 
1343   case AArch64::CSINVWr:
1344   case AArch64::CSINVXr:
1345   case AArch64::CSINCWr:
1346   case AArch64::CSINCXr:
1347   case AArch64::CSELWr:
1348   case AArch64::CSELXr:
1349   case AArch64::CSNEGWr:
1350   case AArch64::CSNEGXr:
1351   case AArch64::FCSELSrrr:
1352   case AArch64::FCSELDrrr: {
1353     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1354     assert(Idx >= 1);
1355     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1356   }
1357   }
1358 }
1359 
1360 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1361   assert(CC != AArch64CC::Invalid);
1362   UsedNZCV UsedFlags;
1363   switch (CC) {
1364   default:
1365     break;
1366 
1367   case AArch64CC::EQ: // Z set
1368   case AArch64CC::NE: // Z clear
1369     UsedFlags.Z = true;
1370     break;
1371 
1372   case AArch64CC::HI: // Z clear and C set
1373   case AArch64CC::LS: // Z set   or  C clear
1374     UsedFlags.Z = true;
1375     LLVM_FALLTHROUGH;
1376   case AArch64CC::HS: // C set
1377   case AArch64CC::LO: // C clear
1378     UsedFlags.C = true;
1379     break;
1380 
1381   case AArch64CC::MI: // N set
1382   case AArch64CC::PL: // N clear
1383     UsedFlags.N = true;
1384     break;
1385 
1386   case AArch64CC::VS: // V set
1387   case AArch64CC::VC: // V clear
1388     UsedFlags.V = true;
1389     break;
1390 
1391   case AArch64CC::GT: // Z clear, N and V the same
1392   case AArch64CC::LE: // Z set,   N and V differ
1393     UsedFlags.Z = true;
1394     LLVM_FALLTHROUGH;
1395   case AArch64CC::GE: // N and V the same
1396   case AArch64CC::LT: // N and V differ
1397     UsedFlags.N = true;
1398     UsedFlags.V = true;
1399     break;
1400   }
1401   return UsedFlags;
1402 }
1403 
1404 static bool isADDSRegImm(unsigned Opcode) {
1405   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1406 }
1407 
1408 static bool isSUBSRegImm(unsigned Opcode) {
1409   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1410 }
1411 
1412 /// Check if CmpInstr can be substituted by MI.
1413 ///
1414 /// CmpInstr can be substituted:
1415 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1416 /// - and, MI and CmpInstr are from the same MachineBB
1417 /// - and, condition flags are not alive in successors of the CmpInstr parent
1418 /// - and, if MI opcode is the S form there must be no defs of flags between
1419 ///        MI and CmpInstr
1420 ///        or if MI opcode is not the S form there must be neither defs of flags
1421 ///        nor uses of flags between MI and CmpInstr.
1422 /// - and  C/V flags are not used after CmpInstr
1423 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1424                                        const TargetRegisterInfo *TRI) {
1425   assert(MI);
1426   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1427   assert(CmpInstr);
1428 
1429   const unsigned CmpOpcode = CmpInstr->getOpcode();
1430   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1431     return false;
1432 
1433   if (MI->getParent() != CmpInstr->getParent())
1434     return false;
1435 
1436   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1437     return false;
1438 
1439   AccessKind AccessToCheck = AK_Write;
1440   if (sForm(*MI) != MI->getOpcode())
1441     AccessToCheck = AK_All;
1442   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1443     return false;
1444 
1445   UsedNZCV NZCVUsedAfterCmp;
1446   for (auto I = std::next(CmpInstr->getIterator()),
1447             E = CmpInstr->getParent()->instr_end();
1448        I != E; ++I) {
1449     const MachineInstr &Instr = *I;
1450     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1451       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1452       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1453         return false;
1454       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1455     }
1456 
1457     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1458       break;
1459   }
1460 
1461   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1462 }
1463 
1464 /// Substitute an instruction comparing to zero with another instruction
1465 /// which produces needed condition flags.
1466 ///
1467 /// Return true on success.
1468 bool AArch64InstrInfo::substituteCmpToZero(
1469     MachineInstr &CmpInstr, unsigned SrcReg,
1470     const MachineRegisterInfo *MRI) const {
1471   assert(MRI);
1472   // Get the unique definition of SrcReg.
1473   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1474   if (!MI)
1475     return false;
1476 
1477   const TargetRegisterInfo *TRI = &getRegisterInfo();
1478 
1479   unsigned NewOpc = sForm(*MI);
1480   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1481     return false;
1482 
1483   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1484     return false;
1485 
1486   // Update the instruction to set NZCV.
1487   MI->setDesc(get(NewOpc));
1488   CmpInstr.eraseFromParent();
1489   bool succeeded = UpdateOperandRegClass(*MI);
1490   (void)succeeded;
1491   assert(succeeded && "Some operands reg class are incompatible!");
1492   MI->addRegisterDefined(AArch64::NZCV, TRI);
1493   return true;
1494 }
1495 
1496 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1497   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1498       MI.getOpcode() != AArch64::CATCHRET)
1499     return false;
1500 
1501   MachineBasicBlock &MBB = *MI.getParent();
1502   auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1503   auto TRI = Subtarget.getRegisterInfo();
1504   DebugLoc DL = MI.getDebugLoc();
1505 
1506   if (MI.getOpcode() == AArch64::CATCHRET) {
1507     // Skip to the first instruction before the epilog.
1508     const TargetInstrInfo *TII =
1509       MBB.getParent()->getSubtarget().getInstrInfo();
1510     MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1511     auto MBBI = MachineBasicBlock::iterator(MI);
1512     MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1513     while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1514            FirstEpilogSEH != MBB.begin())
1515       FirstEpilogSEH = std::prev(FirstEpilogSEH);
1516     if (FirstEpilogSEH != MBB.begin())
1517       FirstEpilogSEH = std::next(FirstEpilogSEH);
1518     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1519         .addReg(AArch64::X0, RegState::Define)
1520         .addMBB(TargetMBB);
1521     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1522         .addReg(AArch64::X0, RegState::Define)
1523         .addReg(AArch64::X0)
1524         .addMBB(TargetMBB)
1525         .addImm(0);
1526     return true;
1527   }
1528 
1529   Register Reg = MI.getOperand(0).getReg();
1530   const GlobalValue *GV =
1531       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1532   const TargetMachine &TM = MBB.getParent()->getTarget();
1533   unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1534   const unsigned char MO_NC = AArch64II::MO_NC;
1535 
1536   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1537     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1538         .addGlobalAddress(GV, 0, OpFlags);
1539     if (Subtarget.isTargetILP32()) {
1540       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1541       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1542           .addDef(Reg32, RegState::Dead)
1543           .addUse(Reg, RegState::Kill)
1544           .addImm(0)
1545           .addMemOperand(*MI.memoperands_begin())
1546           .addDef(Reg, RegState::Implicit);
1547     } else {
1548       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1549           .addReg(Reg, RegState::Kill)
1550           .addImm(0)
1551           .addMemOperand(*MI.memoperands_begin());
1552     }
1553   } else if (TM.getCodeModel() == CodeModel::Large) {
1554     assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1555     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1556         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1557         .addImm(0);
1558     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1559         .addReg(Reg, RegState::Kill)
1560         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1561         .addImm(16);
1562     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1563         .addReg(Reg, RegState::Kill)
1564         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1565         .addImm(32);
1566     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1567         .addReg(Reg, RegState::Kill)
1568         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1569         .addImm(48);
1570     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1571         .addReg(Reg, RegState::Kill)
1572         .addImm(0)
1573         .addMemOperand(*MI.memoperands_begin());
1574   } else if (TM.getCodeModel() == CodeModel::Tiny) {
1575     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1576         .addGlobalAddress(GV, 0, OpFlags);
1577   } else {
1578     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1579         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1580     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1581     if (Subtarget.isTargetILP32()) {
1582       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1583       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1584           .addDef(Reg32, RegState::Dead)
1585           .addUse(Reg, RegState::Kill)
1586           .addGlobalAddress(GV, 0, LoFlags)
1587           .addMemOperand(*MI.memoperands_begin())
1588           .addDef(Reg, RegState::Implicit);
1589     } else {
1590       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1591           .addReg(Reg, RegState::Kill)
1592           .addGlobalAddress(GV, 0, LoFlags)
1593           .addMemOperand(*MI.memoperands_begin());
1594     }
1595   }
1596 
1597   MBB.erase(MI);
1598 
1599   return true;
1600 }
1601 
1602 // Return true if this instruction simply sets its single destination register
1603 // to zero. This is equivalent to a register rename of the zero-register.
1604 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1605   switch (MI.getOpcode()) {
1606   default:
1607     break;
1608   case AArch64::MOVZWi:
1609   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1610     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1611       assert(MI.getDesc().getNumOperands() == 3 &&
1612              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1613       return true;
1614     }
1615     break;
1616   case AArch64::ANDWri: // and Rd, Rzr, #imm
1617     return MI.getOperand(1).getReg() == AArch64::WZR;
1618   case AArch64::ANDXri:
1619     return MI.getOperand(1).getReg() == AArch64::XZR;
1620   case TargetOpcode::COPY:
1621     return MI.getOperand(1).getReg() == AArch64::WZR;
1622   }
1623   return false;
1624 }
1625 
1626 // Return true if this instruction simply renames a general register without
1627 // modifying bits.
1628 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1629   switch (MI.getOpcode()) {
1630   default:
1631     break;
1632   case TargetOpcode::COPY: {
1633     // GPR32 copies will by lowered to ORRXrs
1634     Register DstReg = MI.getOperand(0).getReg();
1635     return (AArch64::GPR32RegClass.contains(DstReg) ||
1636             AArch64::GPR64RegClass.contains(DstReg));
1637   }
1638   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1639     if (MI.getOperand(1).getReg() == AArch64::XZR) {
1640       assert(MI.getDesc().getNumOperands() == 4 &&
1641              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1642       return true;
1643     }
1644     break;
1645   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1646     if (MI.getOperand(2).getImm() == 0) {
1647       assert(MI.getDesc().getNumOperands() == 4 &&
1648              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1649       return true;
1650     }
1651     break;
1652   }
1653   return false;
1654 }
1655 
1656 // Return true if this instruction simply renames a general register without
1657 // modifying bits.
1658 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1659   switch (MI.getOpcode()) {
1660   default:
1661     break;
1662   case TargetOpcode::COPY: {
1663     // FPR64 copies will by lowered to ORR.16b
1664     Register DstReg = MI.getOperand(0).getReg();
1665     return (AArch64::FPR64RegClass.contains(DstReg) ||
1666             AArch64::FPR128RegClass.contains(DstReg));
1667   }
1668   case AArch64::ORRv16i8:
1669     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1670       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1671              "invalid ORRv16i8 operands");
1672       return true;
1673     }
1674     break;
1675   }
1676   return false;
1677 }
1678 
1679 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1680                                                int &FrameIndex) const {
1681   switch (MI.getOpcode()) {
1682   default:
1683     break;
1684   case AArch64::LDRWui:
1685   case AArch64::LDRXui:
1686   case AArch64::LDRBui:
1687   case AArch64::LDRHui:
1688   case AArch64::LDRSui:
1689   case AArch64::LDRDui:
1690   case AArch64::LDRQui:
1691     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1692         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1693       FrameIndex = MI.getOperand(1).getIndex();
1694       return MI.getOperand(0).getReg();
1695     }
1696     break;
1697   }
1698 
1699   return 0;
1700 }
1701 
1702 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1703                                               int &FrameIndex) const {
1704   switch (MI.getOpcode()) {
1705   default:
1706     break;
1707   case AArch64::STRWui:
1708   case AArch64::STRXui:
1709   case AArch64::STRBui:
1710   case AArch64::STRHui:
1711   case AArch64::STRSui:
1712   case AArch64::STRDui:
1713   case AArch64::STRQui:
1714   case AArch64::LDR_PXI:
1715   case AArch64::STR_PXI:
1716     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1717         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1718       FrameIndex = MI.getOperand(1).getIndex();
1719       return MI.getOperand(0).getReg();
1720     }
1721     break;
1722   }
1723   return 0;
1724 }
1725 
1726 /// Check all MachineMemOperands for a hint to suppress pairing.
1727 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1728   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1729     return MMO->getFlags() & MOSuppressPair;
1730   });
1731 }
1732 
1733 /// Set a flag on the first MachineMemOperand to suppress pairing.
1734 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1735   if (MI.memoperands_empty())
1736     return;
1737   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1738 }
1739 
1740 /// Check all MachineMemOperands for a hint that the load/store is strided.
1741 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1742   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1743     return MMO->getFlags() & MOStridedAccess;
1744   });
1745 }
1746 
1747 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1748   switch (Opc) {
1749   default:
1750     return false;
1751   case AArch64::STURSi:
1752   case AArch64::STURDi:
1753   case AArch64::STURQi:
1754   case AArch64::STURBBi:
1755   case AArch64::STURHHi:
1756   case AArch64::STURWi:
1757   case AArch64::STURXi:
1758   case AArch64::LDURSi:
1759   case AArch64::LDURDi:
1760   case AArch64::LDURQi:
1761   case AArch64::LDURWi:
1762   case AArch64::LDURXi:
1763   case AArch64::LDURSWi:
1764   case AArch64::LDURHHi:
1765   case AArch64::LDURBBi:
1766   case AArch64::LDURSBWi:
1767   case AArch64::LDURSHWi:
1768     return true;
1769   }
1770 }
1771 
1772 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1773   switch (Opc) {
1774   default: return {};
1775   case AArch64::PRFMui: return AArch64::PRFUMi;
1776   case AArch64::LDRXui: return AArch64::LDURXi;
1777   case AArch64::LDRWui: return AArch64::LDURWi;
1778   case AArch64::LDRBui: return AArch64::LDURBi;
1779   case AArch64::LDRHui: return AArch64::LDURHi;
1780   case AArch64::LDRSui: return AArch64::LDURSi;
1781   case AArch64::LDRDui: return AArch64::LDURDi;
1782   case AArch64::LDRQui: return AArch64::LDURQi;
1783   case AArch64::LDRBBui: return AArch64::LDURBBi;
1784   case AArch64::LDRHHui: return AArch64::LDURHHi;
1785   case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1786   case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1787   case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1788   case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1789   case AArch64::LDRSWui: return AArch64::LDURSWi;
1790   case AArch64::STRXui: return AArch64::STURXi;
1791   case AArch64::STRWui: return AArch64::STURWi;
1792   case AArch64::STRBui: return AArch64::STURBi;
1793   case AArch64::STRHui: return AArch64::STURHi;
1794   case AArch64::STRSui: return AArch64::STURSi;
1795   case AArch64::STRDui: return AArch64::STURDi;
1796   case AArch64::STRQui: return AArch64::STURQi;
1797   case AArch64::STRBBui: return AArch64::STURBBi;
1798   case AArch64::STRHHui: return AArch64::STURHHi;
1799   }
1800 }
1801 
1802 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1803   switch (Opc) {
1804   default:
1805     return 2;
1806   case AArch64::LDPXi:
1807   case AArch64::LDPDi:
1808   case AArch64::STPXi:
1809   case AArch64::STPDi:
1810   case AArch64::LDNPXi:
1811   case AArch64::LDNPDi:
1812   case AArch64::STNPXi:
1813   case AArch64::STNPDi:
1814   case AArch64::LDPQi:
1815   case AArch64::STPQi:
1816   case AArch64::LDNPQi:
1817   case AArch64::STNPQi:
1818   case AArch64::LDPWi:
1819   case AArch64::LDPSi:
1820   case AArch64::STPWi:
1821   case AArch64::STPSi:
1822   case AArch64::LDNPWi:
1823   case AArch64::LDNPSi:
1824   case AArch64::STNPWi:
1825   case AArch64::STNPSi:
1826   case AArch64::LDG:
1827   case AArch64::STGPi:
1828   case AArch64::LD1B_IMM:
1829   case AArch64::LD1H_IMM:
1830   case AArch64::LD1W_IMM:
1831   case AArch64::LD1D_IMM:
1832   case AArch64::ST1B_IMM:
1833   case AArch64::ST1H_IMM:
1834   case AArch64::ST1W_IMM:
1835   case AArch64::ST1D_IMM:
1836     return 3;
1837   case AArch64::ADDG:
1838   case AArch64::STGOffset:
1839   case AArch64::LDR_PXI:
1840   case AArch64::STR_PXI:
1841     return 2;
1842   }
1843 }
1844 
1845 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1846   switch (MI.getOpcode()) {
1847   default:
1848     return false;
1849   // Scaled instructions.
1850   case AArch64::STRSui:
1851   case AArch64::STRDui:
1852   case AArch64::STRQui:
1853   case AArch64::STRXui:
1854   case AArch64::STRWui:
1855   case AArch64::LDRSui:
1856   case AArch64::LDRDui:
1857   case AArch64::LDRQui:
1858   case AArch64::LDRXui:
1859   case AArch64::LDRWui:
1860   case AArch64::LDRSWui:
1861   // Unscaled instructions.
1862   case AArch64::STURSi:
1863   case AArch64::STURDi:
1864   case AArch64::STURQi:
1865   case AArch64::STURWi:
1866   case AArch64::STURXi:
1867   case AArch64::LDURSi:
1868   case AArch64::LDURDi:
1869   case AArch64::LDURQi:
1870   case AArch64::LDURWi:
1871   case AArch64::LDURXi:
1872   case AArch64::LDURSWi:
1873     return true;
1874   }
1875 }
1876 
1877 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1878                                                    bool &Is64Bit) {
1879   switch (Opc) {
1880   default:
1881     llvm_unreachable("Opcode has no flag setting equivalent!");
1882   // 32-bit cases:
1883   case AArch64::ADDWri:
1884     Is64Bit = false;
1885     return AArch64::ADDSWri;
1886   case AArch64::ADDWrr:
1887     Is64Bit = false;
1888     return AArch64::ADDSWrr;
1889   case AArch64::ADDWrs:
1890     Is64Bit = false;
1891     return AArch64::ADDSWrs;
1892   case AArch64::ADDWrx:
1893     Is64Bit = false;
1894     return AArch64::ADDSWrx;
1895   case AArch64::ANDWri:
1896     Is64Bit = false;
1897     return AArch64::ANDSWri;
1898   case AArch64::ANDWrr:
1899     Is64Bit = false;
1900     return AArch64::ANDSWrr;
1901   case AArch64::ANDWrs:
1902     Is64Bit = false;
1903     return AArch64::ANDSWrs;
1904   case AArch64::BICWrr:
1905     Is64Bit = false;
1906     return AArch64::BICSWrr;
1907   case AArch64::BICWrs:
1908     Is64Bit = false;
1909     return AArch64::BICSWrs;
1910   case AArch64::SUBWri:
1911     Is64Bit = false;
1912     return AArch64::SUBSWri;
1913   case AArch64::SUBWrr:
1914     Is64Bit = false;
1915     return AArch64::SUBSWrr;
1916   case AArch64::SUBWrs:
1917     Is64Bit = false;
1918     return AArch64::SUBSWrs;
1919   case AArch64::SUBWrx:
1920     Is64Bit = false;
1921     return AArch64::SUBSWrx;
1922   // 64-bit cases:
1923   case AArch64::ADDXri:
1924     Is64Bit = true;
1925     return AArch64::ADDSXri;
1926   case AArch64::ADDXrr:
1927     Is64Bit = true;
1928     return AArch64::ADDSXrr;
1929   case AArch64::ADDXrs:
1930     Is64Bit = true;
1931     return AArch64::ADDSXrs;
1932   case AArch64::ADDXrx:
1933     Is64Bit = true;
1934     return AArch64::ADDSXrx;
1935   case AArch64::ANDXri:
1936     Is64Bit = true;
1937     return AArch64::ANDSXri;
1938   case AArch64::ANDXrr:
1939     Is64Bit = true;
1940     return AArch64::ANDSXrr;
1941   case AArch64::ANDXrs:
1942     Is64Bit = true;
1943     return AArch64::ANDSXrs;
1944   case AArch64::BICXrr:
1945     Is64Bit = true;
1946     return AArch64::BICSXrr;
1947   case AArch64::BICXrs:
1948     Is64Bit = true;
1949     return AArch64::BICSXrs;
1950   case AArch64::SUBXri:
1951     Is64Bit = true;
1952     return AArch64::SUBSXri;
1953   case AArch64::SUBXrr:
1954     Is64Bit = true;
1955     return AArch64::SUBSXrr;
1956   case AArch64::SUBXrs:
1957     Is64Bit = true;
1958     return AArch64::SUBSXrs;
1959   case AArch64::SUBXrx:
1960     Is64Bit = true;
1961     return AArch64::SUBSXrx;
1962   }
1963 }
1964 
1965 // Is this a candidate for ld/st merging or pairing?  For example, we don't
1966 // touch volatiles or load/stores that have a hint to avoid pair formation.
1967 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1968   // If this is a volatile load/store, don't mess with it.
1969   if (MI.hasOrderedMemoryRef())
1970     return false;
1971 
1972   // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1973   assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1974          "Expected a reg or frame index operand.");
1975   if (!MI.getOperand(2).isImm())
1976     return false;
1977 
1978   // Can't merge/pair if the instruction modifies the base register.
1979   // e.g., ldr x0, [x0]
1980   // This case will never occur with an FI base.
1981   if (MI.getOperand(1).isReg()) {
1982     Register BaseReg = MI.getOperand(1).getReg();
1983     const TargetRegisterInfo *TRI = &getRegisterInfo();
1984     if (MI.modifiesRegister(BaseReg, TRI))
1985       return false;
1986   }
1987 
1988   // Check if this load/store has a hint to avoid pair formation.
1989   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1990   if (isLdStPairSuppressed(MI))
1991     return false;
1992 
1993   // Do not pair any callee-save store/reload instructions in the
1994   // prologue/epilogue if the CFI information encoded the operations as separate
1995   // instructions, as that will cause the size of the actual prologue to mismatch
1996   // with the prologue size recorded in the Windows CFI.
1997   const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
1998   bool NeedsWinCFI = MAI->usesWindowsCFI() &&
1999                      MI.getMF()->getFunction().needsUnwindTableEntry();
2000   if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2001                       MI.getFlag(MachineInstr::FrameDestroy)))
2002     return false;
2003 
2004   // On some CPUs quad load/store pairs are slower than two single load/stores.
2005   if (Subtarget.isPaired128Slow()) {
2006     switch (MI.getOpcode()) {
2007     default:
2008       break;
2009     case AArch64::LDURQi:
2010     case AArch64::STURQi:
2011     case AArch64::LDRQui:
2012     case AArch64::STRQui:
2013       return false;
2014     }
2015   }
2016 
2017   return true;
2018 }
2019 
2020 bool AArch64InstrInfo::getMemOperandsWithOffset(
2021     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2022     int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
2023     const {
2024   if (!LdSt.mayLoadOrStore())
2025     return false;
2026 
2027   const MachineOperand *BaseOp;
2028   unsigned Width;
2029   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2030                                     Width, TRI))
2031     return false;
2032   BaseOps.push_back(BaseOp);
2033   return true;
2034 }
2035 
2036 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
2037     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2038     bool &OffsetIsScalable, unsigned &Width,
2039     const TargetRegisterInfo *TRI) const {
2040   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2041   // Handle only loads/stores with base register followed by immediate offset.
2042   if (LdSt.getNumExplicitOperands() == 3) {
2043     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2044     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2045         !LdSt.getOperand(2).isImm())
2046       return false;
2047   } else if (LdSt.getNumExplicitOperands() == 4) {
2048     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2049     if (!LdSt.getOperand(1).isReg() ||
2050         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2051         !LdSt.getOperand(3).isImm())
2052       return false;
2053   } else
2054     return false;
2055 
2056   // Get the scaling factor for the instruction and set the width for the
2057   // instruction.
2058   TypeSize Scale(0U, false);
2059   int64_t Dummy1, Dummy2;
2060 
2061   // If this returns false, then it's an instruction we don't want to handle.
2062   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2063     return false;
2064 
2065   // Compute the offset. Offset is calculated as the immediate operand
2066   // multiplied by the scaling factor. Unscaled instructions have scaling factor
2067   // set to 1.
2068   if (LdSt.getNumExplicitOperands() == 3) {
2069     BaseOp = &LdSt.getOperand(1);
2070     Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
2071   } else {
2072     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2073     BaseOp = &LdSt.getOperand(2);
2074     Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
2075   }
2076   OffsetIsScalable = Scale.isScalable();
2077 
2078   if (!BaseOp->isReg() && !BaseOp->isFI())
2079     return false;
2080 
2081   return true;
2082 }
2083 
2084 MachineOperand &
2085 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2086   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2087   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2088   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2089   return OfsOp;
2090 }
2091 
2092 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
2093                                     unsigned &Width, int64_t &MinOffset,
2094                                     int64_t &MaxOffset) {
2095   const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
2096   switch (Opcode) {
2097   // Not a memory operation or something we want to handle.
2098   default:
2099     Scale = TypeSize::Fixed(0);
2100     Width = 0;
2101     MinOffset = MaxOffset = 0;
2102     return false;
2103   case AArch64::STRWpost:
2104   case AArch64::LDRWpost:
2105     Width = 32;
2106     Scale = TypeSize::Fixed(4);
2107     MinOffset = -256;
2108     MaxOffset = 255;
2109     break;
2110   case AArch64::LDURQi:
2111   case AArch64::STURQi:
2112     Width = 16;
2113     Scale = TypeSize::Fixed(1);
2114     MinOffset = -256;
2115     MaxOffset = 255;
2116     break;
2117   case AArch64::PRFUMi:
2118   case AArch64::LDURXi:
2119   case AArch64::LDURDi:
2120   case AArch64::STURXi:
2121   case AArch64::STURDi:
2122     Width = 8;
2123     Scale = TypeSize::Fixed(1);
2124     MinOffset = -256;
2125     MaxOffset = 255;
2126     break;
2127   case AArch64::LDURWi:
2128   case AArch64::LDURSi:
2129   case AArch64::LDURSWi:
2130   case AArch64::STURWi:
2131   case AArch64::STURSi:
2132     Width = 4;
2133     Scale = TypeSize::Fixed(1);
2134     MinOffset = -256;
2135     MaxOffset = 255;
2136     break;
2137   case AArch64::LDURHi:
2138   case AArch64::LDURHHi:
2139   case AArch64::LDURSHXi:
2140   case AArch64::LDURSHWi:
2141   case AArch64::STURHi:
2142   case AArch64::STURHHi:
2143     Width = 2;
2144     Scale = TypeSize::Fixed(1);
2145     MinOffset = -256;
2146     MaxOffset = 255;
2147     break;
2148   case AArch64::LDURBi:
2149   case AArch64::LDURBBi:
2150   case AArch64::LDURSBXi:
2151   case AArch64::LDURSBWi:
2152   case AArch64::STURBi:
2153   case AArch64::STURBBi:
2154     Width = 1;
2155     Scale = TypeSize::Fixed(1);
2156     MinOffset = -256;
2157     MaxOffset = 255;
2158     break;
2159   case AArch64::LDPQi:
2160   case AArch64::LDNPQi:
2161   case AArch64::STPQi:
2162   case AArch64::STNPQi:
2163     Scale = TypeSize::Fixed(16);
2164     Width = 32;
2165     MinOffset = -64;
2166     MaxOffset = 63;
2167     break;
2168   case AArch64::LDRQui:
2169   case AArch64::STRQui:
2170     Scale = TypeSize::Fixed(16);
2171     Width = 16;
2172     MinOffset = 0;
2173     MaxOffset = 4095;
2174     break;
2175   case AArch64::LDPXi:
2176   case AArch64::LDPDi:
2177   case AArch64::LDNPXi:
2178   case AArch64::LDNPDi:
2179   case AArch64::STPXi:
2180   case AArch64::STPDi:
2181   case AArch64::STNPXi:
2182   case AArch64::STNPDi:
2183     Scale = TypeSize::Fixed(8);
2184     Width = 16;
2185     MinOffset = -64;
2186     MaxOffset = 63;
2187     break;
2188   case AArch64::PRFMui:
2189   case AArch64::LDRXui:
2190   case AArch64::LDRDui:
2191   case AArch64::STRXui:
2192   case AArch64::STRDui:
2193     Scale = TypeSize::Fixed(8);
2194     Width = 8;
2195     MinOffset = 0;
2196     MaxOffset = 4095;
2197     break;
2198   case AArch64::LDPWi:
2199   case AArch64::LDPSi:
2200   case AArch64::LDNPWi:
2201   case AArch64::LDNPSi:
2202   case AArch64::STPWi:
2203   case AArch64::STPSi:
2204   case AArch64::STNPWi:
2205   case AArch64::STNPSi:
2206     Scale = TypeSize::Fixed(4);
2207     Width = 8;
2208     MinOffset = -64;
2209     MaxOffset = 63;
2210     break;
2211   case AArch64::LDRWui:
2212   case AArch64::LDRSui:
2213   case AArch64::LDRSWui:
2214   case AArch64::STRWui:
2215   case AArch64::STRSui:
2216     Scale = TypeSize::Fixed(4);
2217     Width = 4;
2218     MinOffset = 0;
2219     MaxOffset = 4095;
2220     break;
2221   case AArch64::LDRHui:
2222   case AArch64::LDRHHui:
2223   case AArch64::LDRSHWui:
2224   case AArch64::LDRSHXui:
2225   case AArch64::STRHui:
2226   case AArch64::STRHHui:
2227     Scale = TypeSize::Fixed(2);
2228     Width = 2;
2229     MinOffset = 0;
2230     MaxOffset = 4095;
2231     break;
2232   case AArch64::LDRBui:
2233   case AArch64::LDRBBui:
2234   case AArch64::LDRSBWui:
2235   case AArch64::LDRSBXui:
2236   case AArch64::STRBui:
2237   case AArch64::STRBBui:
2238     Scale = TypeSize::Fixed(1);
2239     Width = 1;
2240     MinOffset = 0;
2241     MaxOffset = 4095;
2242     break;
2243   case AArch64::ADDG:
2244     Scale = TypeSize::Fixed(16);
2245     Width = 0;
2246     MinOffset = 0;
2247     MaxOffset = 63;
2248     break;
2249   case AArch64::TAGPstack:
2250     Scale = TypeSize::Fixed(16);
2251     Width = 0;
2252     // TAGP with a negative offset turns into SUBP, which has a maximum offset
2253     // of 63 (not 64!).
2254     MinOffset = -63;
2255     MaxOffset = 63;
2256     break;
2257   case AArch64::LDG:
2258   case AArch64::STGOffset:
2259   case AArch64::STZGOffset:
2260     Scale = TypeSize::Fixed(16);
2261     Width = 16;
2262     MinOffset = -256;
2263     MaxOffset = 255;
2264     break;
2265   case AArch64::LDR_PXI:
2266   case AArch64::STR_PXI:
2267     Scale = TypeSize::Scalable(2);
2268     Width = SVEMaxBytesPerVector / 8;
2269     MinOffset = -256;
2270     MaxOffset = 255;
2271     break;
2272   case AArch64::LDR_ZXI:
2273   case AArch64::STR_ZXI:
2274     Scale = TypeSize::Scalable(16);
2275     Width = SVEMaxBytesPerVector;
2276     MinOffset = -256;
2277     MaxOffset = 255;
2278     break;
2279   case AArch64::LD1B_IMM:
2280   case AArch64::LD1H_IMM:
2281   case AArch64::LD1W_IMM:
2282   case AArch64::LD1D_IMM:
2283   case AArch64::ST1B_IMM:
2284   case AArch64::ST1H_IMM:
2285   case AArch64::ST1W_IMM:
2286   case AArch64::ST1D_IMM:
2287     // A full vectors worth of data
2288     // Width = mbytes * elements
2289     Scale = TypeSize::Scalable(16);
2290     Width = SVEMaxBytesPerVector;
2291     MinOffset = -8;
2292     MaxOffset = 7;
2293     break;
2294   case AArch64::ST2GOffset:
2295   case AArch64::STZ2GOffset:
2296     Scale = TypeSize::Fixed(16);
2297     Width = 32;
2298     MinOffset = -256;
2299     MaxOffset = 255;
2300     break;
2301   case AArch64::STGPi:
2302     Scale = TypeSize::Fixed(16);
2303     Width = 16;
2304     MinOffset = -64;
2305     MaxOffset = 63;
2306     break;
2307   }
2308 
2309   return true;
2310 }
2311 
2312 // Scaling factor for unscaled load or store.
2313 int AArch64InstrInfo::getMemScale(unsigned Opc) {
2314   switch (Opc) {
2315   default:
2316     llvm_unreachable("Opcode has unknown scale!");
2317   case AArch64::LDRBBui:
2318   case AArch64::LDURBBi:
2319   case AArch64::LDRSBWui:
2320   case AArch64::LDURSBWi:
2321   case AArch64::STRBBui:
2322   case AArch64::STURBBi:
2323     return 1;
2324   case AArch64::LDRHHui:
2325   case AArch64::LDURHHi:
2326   case AArch64::LDRSHWui:
2327   case AArch64::LDURSHWi:
2328   case AArch64::STRHHui:
2329   case AArch64::STURHHi:
2330     return 2;
2331   case AArch64::LDRSui:
2332   case AArch64::LDURSi:
2333   case AArch64::LDRSWui:
2334   case AArch64::LDURSWi:
2335   case AArch64::LDRWui:
2336   case AArch64::LDURWi:
2337   case AArch64::STRSui:
2338   case AArch64::STURSi:
2339   case AArch64::STRWui:
2340   case AArch64::STURWi:
2341   case AArch64::LDPSi:
2342   case AArch64::LDPSWi:
2343   case AArch64::LDPWi:
2344   case AArch64::STPSi:
2345   case AArch64::STPWi:
2346     return 4;
2347   case AArch64::LDRDui:
2348   case AArch64::LDURDi:
2349   case AArch64::LDRXui:
2350   case AArch64::LDURXi:
2351   case AArch64::STRDui:
2352   case AArch64::STURDi:
2353   case AArch64::STRXui:
2354   case AArch64::STURXi:
2355   case AArch64::LDPDi:
2356   case AArch64::LDPXi:
2357   case AArch64::STPDi:
2358   case AArch64::STPXi:
2359     return 8;
2360   case AArch64::LDRQui:
2361   case AArch64::LDURQi:
2362   case AArch64::STRQui:
2363   case AArch64::STURQi:
2364   case AArch64::LDPQi:
2365   case AArch64::STPQi:
2366   case AArch64::STGOffset:
2367   case AArch64::STZGOffset:
2368   case AArch64::ST2GOffset:
2369   case AArch64::STZ2GOffset:
2370   case AArch64::STGPi:
2371     return 16;
2372   }
2373 }
2374 
2375 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2376 // scaled.
2377 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2378   int Scale = AArch64InstrInfo::getMemScale(Opc);
2379 
2380   // If the byte-offset isn't a multiple of the stride, we can't scale this
2381   // offset.
2382   if (Offset % Scale != 0)
2383     return false;
2384 
2385   // Convert the byte-offset used by unscaled into an "element" offset used
2386   // by the scaled pair load/store instructions.
2387   Offset /= Scale;
2388   return true;
2389 }
2390 
2391 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2392   if (FirstOpc == SecondOpc)
2393     return true;
2394   // We can also pair sign-ext and zero-ext instructions.
2395   switch (FirstOpc) {
2396   default:
2397     return false;
2398   case AArch64::LDRWui:
2399   case AArch64::LDURWi:
2400     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2401   case AArch64::LDRSWui:
2402   case AArch64::LDURSWi:
2403     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2404   }
2405   // These instructions can't be paired based on their opcodes.
2406   return false;
2407 }
2408 
2409 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2410                             int64_t Offset1, unsigned Opcode1, int FI2,
2411                             int64_t Offset2, unsigned Opcode2) {
2412   // Accesses through fixed stack object frame indices may access a different
2413   // fixed stack slot. Check that the object offsets + offsets match.
2414   if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2415     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2416     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2417     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2418     // Convert to scaled object offsets.
2419     int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
2420     if (ObjectOffset1 % Scale1 != 0)
2421       return false;
2422     ObjectOffset1 /= Scale1;
2423     int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
2424     if (ObjectOffset2 % Scale2 != 0)
2425       return false;
2426     ObjectOffset2 /= Scale2;
2427     ObjectOffset1 += Offset1;
2428     ObjectOffset2 += Offset2;
2429     return ObjectOffset1 + 1 == ObjectOffset2;
2430   }
2431 
2432   return FI1 == FI2;
2433 }
2434 
2435 /// Detect opportunities for ldp/stp formation.
2436 ///
2437 /// Only called for LdSt for which getMemOperandWithOffset returns true.
2438 bool AArch64InstrInfo::shouldClusterMemOps(
2439     ArrayRef<const MachineOperand *> BaseOps1,
2440     ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads) const {
2441   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2442   const MachineOperand &BaseOp1 = *BaseOps1.front();
2443   const MachineOperand &BaseOp2 = *BaseOps2.front();
2444   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2445   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2446   if (BaseOp1.getType() != BaseOp2.getType())
2447     return false;
2448 
2449   assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2450          "Only base registers and frame indices are supported.");
2451 
2452   // Check for both base regs and base FI.
2453   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2454     return false;
2455 
2456   // Only cluster up to a single pair.
2457   if (NumLoads > 2)
2458     return false;
2459 
2460   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2461     return false;
2462 
2463   // Can we pair these instructions based on their opcodes?
2464   unsigned FirstOpc = FirstLdSt.getOpcode();
2465   unsigned SecondOpc = SecondLdSt.getOpcode();
2466   if (!canPairLdStOpc(FirstOpc, SecondOpc))
2467     return false;
2468 
2469   // Can't merge volatiles or load/stores that have a hint to avoid pair
2470   // formation, for example.
2471   if (!isCandidateToMergeOrPair(FirstLdSt) ||
2472       !isCandidateToMergeOrPair(SecondLdSt))
2473     return false;
2474 
2475   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2476   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2477   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2478     return false;
2479 
2480   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2481   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2482     return false;
2483 
2484   // Pairwise instructions have a 7-bit signed offset field.
2485   if (Offset1 > 63 || Offset1 < -64)
2486     return false;
2487 
2488   // The caller should already have ordered First/SecondLdSt by offset.
2489   // Note: except for non-equal frame index bases
2490   if (BaseOp1.isFI()) {
2491     assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2492            "Caller should have ordered offsets.");
2493 
2494     const MachineFrameInfo &MFI =
2495         FirstLdSt.getParent()->getParent()->getFrameInfo();
2496     return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2497                            BaseOp2.getIndex(), Offset2, SecondOpc);
2498   }
2499 
2500   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2501 
2502   return Offset1 + 1 == Offset2;
2503 }
2504 
2505 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2506                                             unsigned Reg, unsigned SubIdx,
2507                                             unsigned State,
2508                                             const TargetRegisterInfo *TRI) {
2509   if (!SubIdx)
2510     return MIB.addReg(Reg, State);
2511 
2512   if (Register::isPhysicalRegister(Reg))
2513     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2514   return MIB.addReg(Reg, State, SubIdx);
2515 }
2516 
2517 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2518                                         unsigned NumRegs) {
2519   // We really want the positive remainder mod 32 here, that happens to be
2520   // easily obtainable with a mask.
2521   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2522 }
2523 
2524 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2525                                         MachineBasicBlock::iterator I,
2526                                         const DebugLoc &DL, MCRegister DestReg,
2527                                         MCRegister SrcReg, bool KillSrc,
2528                                         unsigned Opcode,
2529                                         ArrayRef<unsigned> Indices) const {
2530   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2531   const TargetRegisterInfo *TRI = &getRegisterInfo();
2532   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2533   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2534   unsigned NumRegs = Indices.size();
2535 
2536   int SubReg = 0, End = NumRegs, Incr = 1;
2537   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2538     SubReg = NumRegs - 1;
2539     End = -1;
2540     Incr = -1;
2541   }
2542 
2543   for (; SubReg != End; SubReg += Incr) {
2544     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2545     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2546     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2547     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2548   }
2549 }
2550 
2551 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2552                                        MachineBasicBlock::iterator I,
2553                                        DebugLoc DL, unsigned DestReg,
2554                                        unsigned SrcReg, bool KillSrc,
2555                                        unsigned Opcode, unsigned ZeroReg,
2556                                        llvm::ArrayRef<unsigned> Indices) const {
2557   const TargetRegisterInfo *TRI = &getRegisterInfo();
2558   unsigned NumRegs = Indices.size();
2559 
2560 #ifndef NDEBUG
2561   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2562   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2563   assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2564          "GPR reg sequences should not be able to overlap");
2565 #endif
2566 
2567   for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2568     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2569     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2570     MIB.addReg(ZeroReg);
2571     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2572     MIB.addImm(0);
2573   }
2574 }
2575 
2576 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2577                                    MachineBasicBlock::iterator I,
2578                                    const DebugLoc &DL, MCRegister DestReg,
2579                                    MCRegister SrcReg, bool KillSrc) const {
2580   if (AArch64::GPR32spRegClass.contains(DestReg) &&
2581       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2582     const TargetRegisterInfo *TRI = &getRegisterInfo();
2583 
2584     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2585       // If either operand is WSP, expand to ADD #0.
2586       if (Subtarget.hasZeroCycleRegMove()) {
2587         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2588         MCRegister DestRegX = TRI->getMatchingSuperReg(
2589             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2590         MCRegister SrcRegX = TRI->getMatchingSuperReg(
2591             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2592         // This instruction is reading and writing X registers.  This may upset
2593         // the register scavenger and machine verifier, so we need to indicate
2594         // that we are reading an undefined value from SrcRegX, but a proper
2595         // value from SrcReg.
2596         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2597             .addReg(SrcRegX, RegState::Undef)
2598             .addImm(0)
2599             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2600             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2601       } else {
2602         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2603             .addReg(SrcReg, getKillRegState(KillSrc))
2604             .addImm(0)
2605             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2606       }
2607     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2608       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2609           .addImm(0)
2610           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2611     } else {
2612       if (Subtarget.hasZeroCycleRegMove()) {
2613         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2614         MCRegister DestRegX = TRI->getMatchingSuperReg(
2615             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2616         MCRegister SrcRegX = TRI->getMatchingSuperReg(
2617             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2618         // This instruction is reading and writing X registers.  This may upset
2619         // the register scavenger and machine verifier, so we need to indicate
2620         // that we are reading an undefined value from SrcRegX, but a proper
2621         // value from SrcReg.
2622         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2623             .addReg(AArch64::XZR)
2624             .addReg(SrcRegX, RegState::Undef)
2625             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2626       } else {
2627         // Otherwise, expand to ORR WZR.
2628         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2629             .addReg(AArch64::WZR)
2630             .addReg(SrcReg, getKillRegState(KillSrc));
2631       }
2632     }
2633     return;
2634   }
2635 
2636   // Copy a Predicate register by ORRing with itself.
2637   if (AArch64::PPRRegClass.contains(DestReg) &&
2638       AArch64::PPRRegClass.contains(SrcReg)) {
2639     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2640     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
2641       .addReg(SrcReg) // Pg
2642       .addReg(SrcReg)
2643       .addReg(SrcReg, getKillRegState(KillSrc));
2644     return;
2645   }
2646 
2647   // Copy a Z register by ORRing with itself.
2648   if (AArch64::ZPRRegClass.contains(DestReg) &&
2649       AArch64::ZPRRegClass.contains(SrcReg)) {
2650     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2651     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
2652       .addReg(SrcReg)
2653       .addReg(SrcReg, getKillRegState(KillSrc));
2654     return;
2655   }
2656 
2657   if (AArch64::GPR64spRegClass.contains(DestReg) &&
2658       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2659     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2660       // If either operand is SP, expand to ADD #0.
2661       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2662           .addReg(SrcReg, getKillRegState(KillSrc))
2663           .addImm(0)
2664           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2665     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2666       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2667           .addImm(0)
2668           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2669     } else {
2670       // Otherwise, expand to ORR XZR.
2671       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2672           .addReg(AArch64::XZR)
2673           .addReg(SrcReg, getKillRegState(KillSrc));
2674     }
2675     return;
2676   }
2677 
2678   // Copy a DDDD register quad by copying the individual sub-registers.
2679   if (AArch64::DDDDRegClass.contains(DestReg) &&
2680       AArch64::DDDDRegClass.contains(SrcReg)) {
2681     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2682                                        AArch64::dsub2, AArch64::dsub3};
2683     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2684                      Indices);
2685     return;
2686   }
2687 
2688   // Copy a DDD register triple by copying the individual sub-registers.
2689   if (AArch64::DDDRegClass.contains(DestReg) &&
2690       AArch64::DDDRegClass.contains(SrcReg)) {
2691     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2692                                        AArch64::dsub2};
2693     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2694                      Indices);
2695     return;
2696   }
2697 
2698   // Copy a DD register pair by copying the individual sub-registers.
2699   if (AArch64::DDRegClass.contains(DestReg) &&
2700       AArch64::DDRegClass.contains(SrcReg)) {
2701     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2702     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2703                      Indices);
2704     return;
2705   }
2706 
2707   // Copy a QQQQ register quad by copying the individual sub-registers.
2708   if (AArch64::QQQQRegClass.contains(DestReg) &&
2709       AArch64::QQQQRegClass.contains(SrcReg)) {
2710     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2711                                        AArch64::qsub2, AArch64::qsub3};
2712     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2713                      Indices);
2714     return;
2715   }
2716 
2717   // Copy a QQQ register triple by copying the individual sub-registers.
2718   if (AArch64::QQQRegClass.contains(DestReg) &&
2719       AArch64::QQQRegClass.contains(SrcReg)) {
2720     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2721                                        AArch64::qsub2};
2722     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2723                      Indices);
2724     return;
2725   }
2726 
2727   // Copy a QQ register pair by copying the individual sub-registers.
2728   if (AArch64::QQRegClass.contains(DestReg) &&
2729       AArch64::QQRegClass.contains(SrcReg)) {
2730     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2731     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2732                      Indices);
2733     return;
2734   }
2735 
2736   if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2737       AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2738     static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2739     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2740                     AArch64::XZR, Indices);
2741     return;
2742   }
2743 
2744   if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2745       AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2746     static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2747     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2748                     AArch64::WZR, Indices);
2749     return;
2750   }
2751 
2752   if (AArch64::FPR128RegClass.contains(DestReg) &&
2753       AArch64::FPR128RegClass.contains(SrcReg)) {
2754     if (Subtarget.hasNEON()) {
2755       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2756           .addReg(SrcReg)
2757           .addReg(SrcReg, getKillRegState(KillSrc));
2758     } else {
2759       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2760           .addReg(AArch64::SP, RegState::Define)
2761           .addReg(SrcReg, getKillRegState(KillSrc))
2762           .addReg(AArch64::SP)
2763           .addImm(-16);
2764       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2765           .addReg(AArch64::SP, RegState::Define)
2766           .addReg(DestReg, RegState::Define)
2767           .addReg(AArch64::SP)
2768           .addImm(16);
2769     }
2770     return;
2771   }
2772 
2773   if (AArch64::FPR64RegClass.contains(DestReg) &&
2774       AArch64::FPR64RegClass.contains(SrcReg)) {
2775     if (Subtarget.hasNEON()) {
2776       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2777                                        &AArch64::FPR128RegClass);
2778       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2779                                       &AArch64::FPR128RegClass);
2780       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2781           .addReg(SrcReg)
2782           .addReg(SrcReg, getKillRegState(KillSrc));
2783     } else {
2784       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2785           .addReg(SrcReg, getKillRegState(KillSrc));
2786     }
2787     return;
2788   }
2789 
2790   if (AArch64::FPR32RegClass.contains(DestReg) &&
2791       AArch64::FPR32RegClass.contains(SrcReg)) {
2792     if (Subtarget.hasNEON()) {
2793       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2794                                        &AArch64::FPR128RegClass);
2795       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2796                                       &AArch64::FPR128RegClass);
2797       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2798           .addReg(SrcReg)
2799           .addReg(SrcReg, getKillRegState(KillSrc));
2800     } else {
2801       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2802           .addReg(SrcReg, getKillRegState(KillSrc));
2803     }
2804     return;
2805   }
2806 
2807   if (AArch64::FPR16RegClass.contains(DestReg) &&
2808       AArch64::FPR16RegClass.contains(SrcReg)) {
2809     if (Subtarget.hasNEON()) {
2810       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2811                                        &AArch64::FPR128RegClass);
2812       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2813                                       &AArch64::FPR128RegClass);
2814       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2815           .addReg(SrcReg)
2816           .addReg(SrcReg, getKillRegState(KillSrc));
2817     } else {
2818       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2819                                        &AArch64::FPR32RegClass);
2820       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2821                                       &AArch64::FPR32RegClass);
2822       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2823           .addReg(SrcReg, getKillRegState(KillSrc));
2824     }
2825     return;
2826   }
2827 
2828   if (AArch64::FPR8RegClass.contains(DestReg) &&
2829       AArch64::FPR8RegClass.contains(SrcReg)) {
2830     if (Subtarget.hasNEON()) {
2831       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2832                                        &AArch64::FPR128RegClass);
2833       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2834                                       &AArch64::FPR128RegClass);
2835       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2836           .addReg(SrcReg)
2837           .addReg(SrcReg, getKillRegState(KillSrc));
2838     } else {
2839       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2840                                        &AArch64::FPR32RegClass);
2841       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2842                                       &AArch64::FPR32RegClass);
2843       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2844           .addReg(SrcReg, getKillRegState(KillSrc));
2845     }
2846     return;
2847   }
2848 
2849   // Copies between GPR64 and FPR64.
2850   if (AArch64::FPR64RegClass.contains(DestReg) &&
2851       AArch64::GPR64RegClass.contains(SrcReg)) {
2852     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2853         .addReg(SrcReg, getKillRegState(KillSrc));
2854     return;
2855   }
2856   if (AArch64::GPR64RegClass.contains(DestReg) &&
2857       AArch64::FPR64RegClass.contains(SrcReg)) {
2858     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2859         .addReg(SrcReg, getKillRegState(KillSrc));
2860     return;
2861   }
2862   // Copies between GPR32 and FPR32.
2863   if (AArch64::FPR32RegClass.contains(DestReg) &&
2864       AArch64::GPR32RegClass.contains(SrcReg)) {
2865     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2866         .addReg(SrcReg, getKillRegState(KillSrc));
2867     return;
2868   }
2869   if (AArch64::GPR32RegClass.contains(DestReg) &&
2870       AArch64::FPR32RegClass.contains(SrcReg)) {
2871     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2872         .addReg(SrcReg, getKillRegState(KillSrc));
2873     return;
2874   }
2875 
2876   if (DestReg == AArch64::NZCV) {
2877     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2878     BuildMI(MBB, I, DL, get(AArch64::MSR))
2879         .addImm(AArch64SysReg::NZCV)
2880         .addReg(SrcReg, getKillRegState(KillSrc))
2881         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2882     return;
2883   }
2884 
2885   if (SrcReg == AArch64::NZCV) {
2886     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2887     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2888         .addImm(AArch64SysReg::NZCV)
2889         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2890     return;
2891   }
2892 
2893   llvm_unreachable("unimplemented reg-to-reg copy");
2894 }
2895 
2896 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2897                                     MachineBasicBlock &MBB,
2898                                     MachineBasicBlock::iterator InsertBefore,
2899                                     const MCInstrDesc &MCID,
2900                                     Register SrcReg, bool IsKill,
2901                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
2902                                     MachineMemOperand *MMO) {
2903   Register SrcReg0 = SrcReg;
2904   Register SrcReg1 = SrcReg;
2905   if (Register::isPhysicalRegister(SrcReg)) {
2906     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2907     SubIdx0 = 0;
2908     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2909     SubIdx1 = 0;
2910   }
2911   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2912       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2913       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2914       .addFrameIndex(FI)
2915       .addImm(0)
2916       .addMemOperand(MMO);
2917 }
2918 
2919 void AArch64InstrInfo::storeRegToStackSlot(
2920     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
2921     bool isKill, int FI, const TargetRegisterClass *RC,
2922     const TargetRegisterInfo *TRI) const {
2923   MachineFunction &MF = *MBB.getParent();
2924   MachineFrameInfo &MFI = MF.getFrameInfo();
2925 
2926   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2927   MachineMemOperand *MMO =
2928       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
2929                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
2930   unsigned Opc = 0;
2931   bool Offset = true;
2932   switch (TRI->getSpillSize(*RC)) {
2933   case 1:
2934     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2935       Opc = AArch64::STRBui;
2936     break;
2937   case 2:
2938     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2939       Opc = AArch64::STRHui;
2940     break;
2941   case 4:
2942     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2943       Opc = AArch64::STRWui;
2944       if (Register::isVirtualRegister(SrcReg))
2945         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2946       else
2947         assert(SrcReg != AArch64::WSP);
2948     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2949       Opc = AArch64::STRSui;
2950     break;
2951   case 8:
2952     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2953       Opc = AArch64::STRXui;
2954       if (Register::isVirtualRegister(SrcReg))
2955         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2956       else
2957         assert(SrcReg != AArch64::SP);
2958     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2959       Opc = AArch64::STRDui;
2960     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2961       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2962                               get(AArch64::STPWi), SrcReg, isKill,
2963                               AArch64::sube32, AArch64::subo32, FI, MMO);
2964       return;
2965     }
2966     break;
2967   case 16:
2968     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2969       Opc = AArch64::STRQui;
2970     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2971       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2972       Opc = AArch64::ST1Twov1d;
2973       Offset = false;
2974     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2975       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2976                               get(AArch64::STPXi), SrcReg, isKill,
2977                               AArch64::sube64, AArch64::subo64, FI, MMO);
2978       return;
2979     }
2980     break;
2981   case 24:
2982     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2983       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2984       Opc = AArch64::ST1Threev1d;
2985       Offset = false;
2986     }
2987     break;
2988   case 32:
2989     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2990       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2991       Opc = AArch64::ST1Fourv1d;
2992       Offset = false;
2993     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2994       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2995       Opc = AArch64::ST1Twov2d;
2996       Offset = false;
2997     }
2998     break;
2999   case 48:
3000     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3001       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3002       Opc = AArch64::ST1Threev2d;
3003       Offset = false;
3004     }
3005     break;
3006   case 64:
3007     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3008       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3009       Opc = AArch64::ST1Fourv2d;
3010       Offset = false;
3011     }
3012     break;
3013   }
3014   unsigned StackID = TargetStackID::Default;
3015   if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3016     assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3017     Opc = AArch64::STR_PXI;
3018     StackID = TargetStackID::SVEVector;
3019   } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3020     assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3021     Opc = AArch64::STR_ZXI;
3022     StackID = TargetStackID::SVEVector;
3023   }
3024   assert(Opc && "Unknown register class");
3025   MFI.setStackID(FI, StackID);
3026 
3027   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3028                                      .addReg(SrcReg, getKillRegState(isKill))
3029                                      .addFrameIndex(FI);
3030 
3031   if (Offset)
3032     MI.addImm(0);
3033   MI.addMemOperand(MMO);
3034 }
3035 
3036 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
3037                                      MachineBasicBlock &MBB,
3038                                      MachineBasicBlock::iterator InsertBefore,
3039                                      const MCInstrDesc &MCID,
3040                                      Register DestReg, unsigned SubIdx0,
3041                                      unsigned SubIdx1, int FI,
3042                                      MachineMemOperand *MMO) {
3043   Register DestReg0 = DestReg;
3044   Register DestReg1 = DestReg;
3045   bool IsUndef = true;
3046   if (Register::isPhysicalRegister(DestReg)) {
3047     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
3048     SubIdx0 = 0;
3049     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
3050     SubIdx1 = 0;
3051     IsUndef = false;
3052   }
3053   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3054       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
3055       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
3056       .addFrameIndex(FI)
3057       .addImm(0)
3058       .addMemOperand(MMO);
3059 }
3060 
3061 void AArch64InstrInfo::loadRegFromStackSlot(
3062     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
3063     int FI, const TargetRegisterClass *RC,
3064     const TargetRegisterInfo *TRI) const {
3065   MachineFunction &MF = *MBB.getParent();
3066   MachineFrameInfo &MFI = MF.getFrameInfo();
3067   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3068   MachineMemOperand *MMO =
3069       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
3070                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3071 
3072   unsigned Opc = 0;
3073   bool Offset = true;
3074   switch (TRI->getSpillSize(*RC)) {
3075   case 1:
3076     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3077       Opc = AArch64::LDRBui;
3078     break;
3079   case 2:
3080     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3081       Opc = AArch64::LDRHui;
3082     break;
3083   case 4:
3084     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3085       Opc = AArch64::LDRWui;
3086       if (Register::isVirtualRegister(DestReg))
3087         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3088       else
3089         assert(DestReg != AArch64::WSP);
3090     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3091       Opc = AArch64::LDRSui;
3092     break;
3093   case 8:
3094     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3095       Opc = AArch64::LDRXui;
3096       if (Register::isVirtualRegister(DestReg))
3097         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3098       else
3099         assert(DestReg != AArch64::SP);
3100     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3101       Opc = AArch64::LDRDui;
3102     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3103       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3104                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
3105                                AArch64::subo32, FI, MMO);
3106       return;
3107     }
3108     break;
3109   case 16:
3110     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3111       Opc = AArch64::LDRQui;
3112     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3113       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3114       Opc = AArch64::LD1Twov1d;
3115       Offset = false;
3116     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3117       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3118                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
3119                                AArch64::subo64, FI, MMO);
3120       return;
3121     }
3122     break;
3123   case 24:
3124     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3125       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3126       Opc = AArch64::LD1Threev1d;
3127       Offset = false;
3128     }
3129     break;
3130   case 32:
3131     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3132       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3133       Opc = AArch64::LD1Fourv1d;
3134       Offset = false;
3135     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3136       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3137       Opc = AArch64::LD1Twov2d;
3138       Offset = false;
3139     }
3140     break;
3141   case 48:
3142     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3143       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3144       Opc = AArch64::LD1Threev2d;
3145       Offset = false;
3146     }
3147     break;
3148   case 64:
3149     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3150       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3151       Opc = AArch64::LD1Fourv2d;
3152       Offset = false;
3153     }
3154     break;
3155   }
3156 
3157   unsigned StackID = TargetStackID::Default;
3158   if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3159     assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3160     Opc = AArch64::LDR_PXI;
3161     StackID = TargetStackID::SVEVector;
3162   } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3163     assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3164     Opc = AArch64::LDR_ZXI;
3165     StackID = TargetStackID::SVEVector;
3166   }
3167   assert(Opc && "Unknown register class");
3168   MFI.setStackID(FI, StackID);
3169 
3170   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3171                                      .addReg(DestReg, getDefRegState(true))
3172                                      .addFrameIndex(FI);
3173   if (Offset)
3174     MI.addImm(0);
3175   MI.addMemOperand(MMO);
3176 }
3177 
3178 // Helper function to emit a frame offset adjustment from a given
3179 // pointer (SrcReg), stored into DestReg. This function is explicit
3180 // in that it requires the opcode.
3181 static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3182                                MachineBasicBlock::iterator MBBI,
3183                                const DebugLoc &DL, unsigned DestReg,
3184                                unsigned SrcReg, int64_t Offset, unsigned Opc,
3185                                const TargetInstrInfo *TII,
3186                                MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3187                                bool *HasWinCFI) {
3188   int Sign = 1;
3189   unsigned MaxEncoding, ShiftSize;
3190   switch (Opc) {
3191   case AArch64::ADDXri:
3192   case AArch64::ADDSXri:
3193   case AArch64::SUBXri:
3194   case AArch64::SUBSXri:
3195     MaxEncoding = 0xfff;
3196     ShiftSize = 12;
3197     break;
3198   case AArch64::ADDVL_XXI:
3199   case AArch64::ADDPL_XXI:
3200     MaxEncoding = 31;
3201     ShiftSize = 0;
3202     if (Offset < 0) {
3203       MaxEncoding = 32;
3204       Sign = -1;
3205       Offset = -Offset;
3206     }
3207     break;
3208   default:
3209     llvm_unreachable("Unsupported opcode");
3210   }
3211 
3212   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3213   // scratch register.  If DestReg is a virtual register, use it as the
3214   // scratch register; otherwise, create a new virtual register (to be
3215   // replaced by the scavenger at the end of PEI).  That case can be optimized
3216   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3217   // register can be loaded with offset%8 and the add/sub can use an extending
3218   // instruction with LSL#3.
3219   // Currently the function handles any offsets but generates a poor sequence
3220   // of code.
3221   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3222 
3223   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3224   do {
3225     uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
3226     unsigned LocalShiftSize = 0;
3227     if (ThisVal > MaxEncoding) {
3228       ThisVal = ThisVal >> ShiftSize;
3229       LocalShiftSize = ShiftSize;
3230     }
3231     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3232            "Encoding cannot handle value that big");
3233     auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3234                    .addReg(SrcReg)
3235                    .addImm(Sign * (int)ThisVal);
3236     if (ShiftSize)
3237       MBI = MBI.addImm(
3238           AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
3239     MBI = MBI.setMIFlag(Flag);
3240 
3241     if (NeedsWinCFI) {
3242       assert(Sign == 1 && "SEH directives should always have a positive sign");
3243       int Imm = (int)(ThisVal << LocalShiftSize);
3244       if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3245           (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3246         if (HasWinCFI)
3247           *HasWinCFI = true;
3248         if (Imm == 0)
3249           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
3250         else
3251           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
3252               .addImm(Imm)
3253               .setMIFlag(Flag);
3254         assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
3255                                       "emit a single SEH directive");
3256       } else if (DestReg == AArch64::SP) {
3257         if (HasWinCFI)
3258           *HasWinCFI = true;
3259         assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
3260         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3261             .addImm(Imm)
3262             .setMIFlag(Flag);
3263       }
3264       if (HasWinCFI)
3265         *HasWinCFI = true;
3266     }
3267 
3268     SrcReg = DestReg;
3269     Offset -= ThisVal << LocalShiftSize;
3270   } while (Offset);
3271 }
3272 
3273 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3274                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3275                            unsigned DestReg, unsigned SrcReg,
3276                            StackOffset Offset, const TargetInstrInfo *TII,
3277                            MachineInstr::MIFlag Flag, bool SetNZCV,
3278                            bool NeedsWinCFI, bool *HasWinCFI) {
3279   int64_t Bytes, NumPredicateVectors, NumDataVectors;
3280   Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
3281 
3282   // First emit non-scalable frame offsets, or a simple 'mov'.
3283   if (Bytes || (!Offset && SrcReg != DestReg)) {
3284     assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
3285            "SP increment/decrement not 16-byte aligned");
3286     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
3287     if (Bytes < 0) {
3288       Bytes = -Bytes;
3289       Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
3290     }
3291     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
3292                        NeedsWinCFI, HasWinCFI);
3293     SrcReg = DestReg;
3294   }
3295 
3296   assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
3297          "SetNZCV not supported with SVE vectors");
3298   assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
3299          "WinCFI not supported with SVE vectors");
3300 
3301   if (NumDataVectors) {
3302     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
3303                        AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3304     SrcReg = DestReg;
3305   }
3306 
3307   if (NumPredicateVectors) {
3308     assert(DestReg != AArch64::SP && "Unaligned access to SP");
3309     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
3310                        AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3311   }
3312 }
3313 
3314 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3315     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3316     MachineBasicBlock::iterator InsertPt, int FrameIndex,
3317     LiveIntervals *LIS, VirtRegMap *VRM) const {
3318   // This is a bit of a hack. Consider this instruction:
3319   //
3320   //   %0 = COPY %sp; GPR64all:%0
3321   //
3322   // We explicitly chose GPR64all for the virtual register so such a copy might
3323   // be eliminated by RegisterCoalescer. However, that may not be possible, and
3324   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3325   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3326   //
3327   // To prevent that, we are going to constrain the %0 register class here.
3328   //
3329   // <rdar://problem/11522048>
3330   //
3331   if (MI.isFullCopy()) {
3332     Register DstReg = MI.getOperand(0).getReg();
3333     Register SrcReg = MI.getOperand(1).getReg();
3334     if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
3335       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3336       return nullptr;
3337     }
3338     if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
3339       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3340       return nullptr;
3341     }
3342   }
3343 
3344   // Handle the case where a copy is being spilled or filled but the source
3345   // and destination register class don't match.  For example:
3346   //
3347   //   %0 = COPY %xzr; GPR64common:%0
3348   //
3349   // In this case we can still safely fold away the COPY and generate the
3350   // following spill code:
3351   //
3352   //   STRXui %xzr, %stack.0
3353   //
3354   // This also eliminates spilled cross register class COPYs (e.g. between x and
3355   // d regs) of the same size.  For example:
3356   //
3357   //   %0 = COPY %1; GPR64:%0, FPR64:%1
3358   //
3359   // will be filled as
3360   //
3361   //   LDRDui %0, fi<#0>
3362   //
3363   // instead of
3364   //
3365   //   LDRXui %Temp, fi<#0>
3366   //   %0 = FMOV %Temp
3367   //
3368   if (MI.isCopy() && Ops.size() == 1 &&
3369       // Make sure we're only folding the explicit COPY defs/uses.
3370       (Ops[0] == 0 || Ops[0] == 1)) {
3371     bool IsSpill = Ops[0] == 0;
3372     bool IsFill = !IsSpill;
3373     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3374     const MachineRegisterInfo &MRI = MF.getRegInfo();
3375     MachineBasicBlock &MBB = *MI.getParent();
3376     const MachineOperand &DstMO = MI.getOperand(0);
3377     const MachineOperand &SrcMO = MI.getOperand(1);
3378     Register DstReg = DstMO.getReg();
3379     Register SrcReg = SrcMO.getReg();
3380     // This is slightly expensive to compute for physical regs since
3381     // getMinimalPhysRegClass is slow.
3382     auto getRegClass = [&](unsigned Reg) {
3383       return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
3384                                               : TRI.getMinimalPhysRegClass(Reg);
3385     };
3386 
3387     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3388       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3389                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3390              "Mismatched register size in non subreg COPY");
3391       if (IsSpill)
3392         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3393                             getRegClass(SrcReg), &TRI);
3394       else
3395         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3396                              getRegClass(DstReg), &TRI);
3397       return &*--InsertPt;
3398     }
3399 
3400     // Handle cases like spilling def of:
3401     //
3402     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3403     //
3404     // where the physical register source can be widened and stored to the full
3405     // virtual reg destination stack slot, in this case producing:
3406     //
3407     //   STRXui %xzr, %stack.0
3408     //
3409     if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
3410       assert(SrcMO.getSubReg() == 0 &&
3411              "Unexpected subreg on physical register");
3412       const TargetRegisterClass *SpillRC;
3413       unsigned SpillSubreg;
3414       switch (DstMO.getSubReg()) {
3415       default:
3416         SpillRC = nullptr;
3417         break;
3418       case AArch64::sub_32:
3419       case AArch64::ssub:
3420         if (AArch64::GPR32RegClass.contains(SrcReg)) {
3421           SpillRC = &AArch64::GPR64RegClass;
3422           SpillSubreg = AArch64::sub_32;
3423         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3424           SpillRC = &AArch64::FPR64RegClass;
3425           SpillSubreg = AArch64::ssub;
3426         } else
3427           SpillRC = nullptr;
3428         break;
3429       case AArch64::dsub:
3430         if (AArch64::FPR64RegClass.contains(SrcReg)) {
3431           SpillRC = &AArch64::FPR128RegClass;
3432           SpillSubreg = AArch64::dsub;
3433         } else
3434           SpillRC = nullptr;
3435         break;
3436       }
3437 
3438       if (SpillRC)
3439         if (unsigned WidenedSrcReg =
3440                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3441           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3442                               FrameIndex, SpillRC, &TRI);
3443           return &*--InsertPt;
3444         }
3445     }
3446 
3447     // Handle cases like filling use of:
3448     //
3449     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3450     //
3451     // where we can load the full virtual reg source stack slot, into the subreg
3452     // destination, in this case producing:
3453     //
3454     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3455     //
3456     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3457       const TargetRegisterClass *FillRC;
3458       switch (DstMO.getSubReg()) {
3459       default:
3460         FillRC = nullptr;
3461         break;
3462       case AArch64::sub_32:
3463         FillRC = &AArch64::GPR32RegClass;
3464         break;
3465       case AArch64::ssub:
3466         FillRC = &AArch64::FPR32RegClass;
3467         break;
3468       case AArch64::dsub:
3469         FillRC = &AArch64::FPR64RegClass;
3470         break;
3471       }
3472 
3473       if (FillRC) {
3474         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3475                    TRI.getRegSizeInBits(*FillRC) &&
3476                "Mismatched regclass size on folded subreg COPY");
3477         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3478         MachineInstr &LoadMI = *--InsertPt;
3479         MachineOperand &LoadDst = LoadMI.getOperand(0);
3480         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3481         LoadDst.setSubReg(DstMO.getSubReg());
3482         LoadDst.setIsUndef();
3483         return &LoadMI;
3484       }
3485     }
3486   }
3487 
3488   // Cannot fold.
3489   return nullptr;
3490 }
3491 
3492 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
3493                                     StackOffset &SOffset,
3494                                     bool *OutUseUnscaledOp,
3495                                     unsigned *OutUnscaledOp,
3496                                     int64_t *EmittableOffset) {
3497   // Set output values in case of early exit.
3498   if (EmittableOffset)
3499     *EmittableOffset = 0;
3500   if (OutUseUnscaledOp)
3501     *OutUseUnscaledOp = false;
3502   if (OutUnscaledOp)
3503     *OutUnscaledOp = 0;
3504 
3505   // Exit early for structured vector spills/fills as they can't take an
3506   // immediate offset.
3507   switch (MI.getOpcode()) {
3508   default:
3509     break;
3510   case AArch64::LD1Twov2d:
3511   case AArch64::LD1Threev2d:
3512   case AArch64::LD1Fourv2d:
3513   case AArch64::LD1Twov1d:
3514   case AArch64::LD1Threev1d:
3515   case AArch64::LD1Fourv1d:
3516   case AArch64::ST1Twov2d:
3517   case AArch64::ST1Threev2d:
3518   case AArch64::ST1Fourv2d:
3519   case AArch64::ST1Twov1d:
3520   case AArch64::ST1Threev1d:
3521   case AArch64::ST1Fourv1d:
3522   case AArch64::IRG:
3523   case AArch64::IRGstack:
3524   case AArch64::STGloop:
3525   case AArch64::STZGloop:
3526     return AArch64FrameOffsetCannotUpdate;
3527   }
3528 
3529   // Get the min/max offset and the scale.
3530   TypeSize ScaleValue(0U, false);
3531   unsigned Width;
3532   int64_t MinOff, MaxOff;
3533   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
3534                                       MaxOff))
3535     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3536 
3537   // Construct the complete offset.
3538   bool IsMulVL = ScaleValue.isScalable();
3539   unsigned Scale = ScaleValue.getKnownMinSize();
3540   int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
3541 
3542   const MachineOperand &ImmOpnd =
3543       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3544   Offset += ImmOpnd.getImm() * Scale;
3545 
3546   // If the offset doesn't match the scale, we rewrite the instruction to
3547   // use the unscaled instruction instead. Likewise, if we have a negative
3548   // offset and there is an unscaled op to use.
3549   Optional<unsigned> UnscaledOp =
3550       AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3551   bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3552   if (useUnscaledOp &&
3553       !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
3554                                       MaxOff))
3555     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3556 
3557   Scale = ScaleValue.getKnownMinSize();
3558   assert(IsMulVL == ScaleValue.isScalable() &&
3559          "Unscaled opcode has different value for scalable");
3560 
3561   int64_t Remainder = Offset % Scale;
3562   assert(!(Remainder && useUnscaledOp) &&
3563          "Cannot have remainder when using unscaled op");
3564 
3565   assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3566   int64_t NewOffset = Offset / Scale;
3567   if (MinOff <= NewOffset && NewOffset <= MaxOff)
3568     Offset = Remainder;
3569   else {
3570     NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3571     Offset = Offset - NewOffset * Scale + Remainder;
3572   }
3573 
3574   if (EmittableOffset)
3575     *EmittableOffset = NewOffset;
3576   if (OutUseUnscaledOp)
3577     *OutUseUnscaledOp = useUnscaledOp;
3578   if (OutUnscaledOp && UnscaledOp)
3579     *OutUnscaledOp = *UnscaledOp;
3580 
3581   if (IsMulVL)
3582     SOffset = StackOffset(Offset, MVT::nxv1i8) +
3583               StackOffset(SOffset.getBytes(), MVT::i8);
3584   else
3585     SOffset = StackOffset(Offset, MVT::i8) +
3586               StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
3587   return AArch64FrameOffsetCanUpdate |
3588          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
3589 }
3590 
3591 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3592                                     unsigned FrameReg, StackOffset &Offset,
3593                                     const AArch64InstrInfo *TII) {
3594   unsigned Opcode = MI.getOpcode();
3595   unsigned ImmIdx = FrameRegIdx + 1;
3596 
3597   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3598     Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
3599     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3600                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3601                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3602     MI.eraseFromParent();
3603     Offset = StackOffset();
3604     return true;
3605   }
3606 
3607   int64_t NewOffset;
3608   unsigned UnscaledOp;
3609   bool UseUnscaledOp;
3610   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3611                                          &UnscaledOp, &NewOffset);
3612   if (Status & AArch64FrameOffsetCanUpdate) {
3613     if (Status & AArch64FrameOffsetIsLegal)
3614       // Replace the FrameIndex with FrameReg.
3615       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3616     if (UseUnscaledOp)
3617       MI.setDesc(TII->get(UnscaledOp));
3618 
3619     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3620     return !Offset;
3621   }
3622 
3623   return false;
3624 }
3625 
3626 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3627   NopInst.setOpcode(AArch64::HINT);
3628   NopInst.addOperand(MCOperand::createImm(0));
3629 }
3630 
3631 // AArch64 supports MachineCombiner.
3632 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3633 
3634 // True when Opc sets flag
3635 static bool isCombineInstrSettingFlag(unsigned Opc) {
3636   switch (Opc) {
3637   case AArch64::ADDSWrr:
3638   case AArch64::ADDSWri:
3639   case AArch64::ADDSXrr:
3640   case AArch64::ADDSXri:
3641   case AArch64::SUBSWrr:
3642   case AArch64::SUBSXrr:
3643   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3644   case AArch64::SUBSWri:
3645   case AArch64::SUBSXri:
3646     return true;
3647   default:
3648     break;
3649   }
3650   return false;
3651 }
3652 
3653 // 32b Opcodes that can be combined with a MUL
3654 static bool isCombineInstrCandidate32(unsigned Opc) {
3655   switch (Opc) {
3656   case AArch64::ADDWrr:
3657   case AArch64::ADDWri:
3658   case AArch64::SUBWrr:
3659   case AArch64::ADDSWrr:
3660   case AArch64::ADDSWri:
3661   case AArch64::SUBSWrr:
3662   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3663   case AArch64::SUBWri:
3664   case AArch64::SUBSWri:
3665     return true;
3666   default:
3667     break;
3668   }
3669   return false;
3670 }
3671 
3672 // 64b Opcodes that can be combined with a MUL
3673 static bool isCombineInstrCandidate64(unsigned Opc) {
3674   switch (Opc) {
3675   case AArch64::ADDXrr:
3676   case AArch64::ADDXri:
3677   case AArch64::SUBXrr:
3678   case AArch64::ADDSXrr:
3679   case AArch64::ADDSXri:
3680   case AArch64::SUBSXrr:
3681   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3682   case AArch64::SUBXri:
3683   case AArch64::SUBSXri:
3684   case AArch64::ADDv8i8:
3685   case AArch64::ADDv16i8:
3686   case AArch64::ADDv4i16:
3687   case AArch64::ADDv8i16:
3688   case AArch64::ADDv2i32:
3689   case AArch64::ADDv4i32:
3690   case AArch64::SUBv8i8:
3691   case AArch64::SUBv16i8:
3692   case AArch64::SUBv4i16:
3693   case AArch64::SUBv8i16:
3694   case AArch64::SUBv2i32:
3695   case AArch64::SUBv4i32:
3696     return true;
3697   default:
3698     break;
3699   }
3700   return false;
3701 }
3702 
3703 // FP Opcodes that can be combined with a FMUL
3704 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3705   switch (Inst.getOpcode()) {
3706   default:
3707     break;
3708   case AArch64::FADDHrr:
3709   case AArch64::FADDSrr:
3710   case AArch64::FADDDrr:
3711   case AArch64::FADDv4f16:
3712   case AArch64::FADDv8f16:
3713   case AArch64::FADDv2f32:
3714   case AArch64::FADDv2f64:
3715   case AArch64::FADDv4f32:
3716   case AArch64::FSUBHrr:
3717   case AArch64::FSUBSrr:
3718   case AArch64::FSUBDrr:
3719   case AArch64::FSUBv4f16:
3720   case AArch64::FSUBv8f16:
3721   case AArch64::FSUBv2f32:
3722   case AArch64::FSUBv2f64:
3723   case AArch64::FSUBv4f32:
3724     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3725     return (Options.UnsafeFPMath ||
3726             Options.AllowFPOpFusion == FPOpFusion::Fast);
3727   }
3728   return false;
3729 }
3730 
3731 // Opcodes that can be combined with a MUL
3732 static bool isCombineInstrCandidate(unsigned Opc) {
3733   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3734 }
3735 
3736 //
3737 // Utility routine that checks if \param MO is defined by an
3738 // \param CombineOpc instruction in the basic block \param MBB
3739 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3740                        unsigned CombineOpc, unsigned ZeroReg = 0,
3741                        bool CheckZeroReg = false) {
3742   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3743   MachineInstr *MI = nullptr;
3744 
3745   if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
3746     MI = MRI.getUniqueVRegDef(MO.getReg());
3747   // And it needs to be in the trace (otherwise, it won't have a depth).
3748   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3749     return false;
3750   // Must only used by the user we combine with.
3751   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3752     return false;
3753 
3754   if (CheckZeroReg) {
3755     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3756            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3757            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3758     // The third input reg must be zero.
3759     if (MI->getOperand(3).getReg() != ZeroReg)
3760       return false;
3761   }
3762 
3763   return true;
3764 }
3765 
3766 //
3767 // Is \param MO defined by an integer multiply and can be combined?
3768 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3769                               unsigned MulOpc, unsigned ZeroReg) {
3770   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3771 }
3772 
3773 //
3774 // Is \param MO defined by a floating-point multiply and can be combined?
3775 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3776                                unsigned MulOpc) {
3777   return canCombine(MBB, MO, MulOpc);
3778 }
3779 
3780 // TODO: There are many more machine instruction opcodes to match:
3781 //       1. Other data types (integer, vectors)
3782 //       2. Other math / logic operations (xor, or)
3783 //       3. Other forms of the same operation (intrinsics and other variants)
3784 bool AArch64InstrInfo::isAssociativeAndCommutative(
3785     const MachineInstr &Inst) const {
3786   switch (Inst.getOpcode()) {
3787   case AArch64::FADDDrr:
3788   case AArch64::FADDSrr:
3789   case AArch64::FADDv2f32:
3790   case AArch64::FADDv2f64:
3791   case AArch64::FADDv4f32:
3792   case AArch64::FMULDrr:
3793   case AArch64::FMULSrr:
3794   case AArch64::FMULX32:
3795   case AArch64::FMULX64:
3796   case AArch64::FMULXv2f32:
3797   case AArch64::FMULXv2f64:
3798   case AArch64::FMULXv4f32:
3799   case AArch64::FMULv2f32:
3800   case AArch64::FMULv2f64:
3801   case AArch64::FMULv4f32:
3802     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3803   default:
3804     return false;
3805   }
3806 }
3807 
3808 /// Find instructions that can be turned into madd.
3809 static bool getMaddPatterns(MachineInstr &Root,
3810                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3811   unsigned Opc = Root.getOpcode();
3812   MachineBasicBlock &MBB = *Root.getParent();
3813   bool Found = false;
3814 
3815   if (!isCombineInstrCandidate(Opc))
3816     return false;
3817   if (isCombineInstrSettingFlag(Opc)) {
3818     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3819     // When NZCV is live bail out.
3820     if (Cmp_NZCV == -1)
3821       return false;
3822     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3823     // When opcode can't change bail out.
3824     // CHECKME: do we miss any cases for opcode conversion?
3825     if (NewOpc == Opc)
3826       return false;
3827     Opc = NewOpc;
3828   }
3829 
3830   auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
3831                       MachineCombinerPattern Pattern) {
3832     if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
3833       Patterns.push_back(Pattern);
3834       Found = true;
3835     }
3836   };
3837 
3838   auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
3839     if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
3840       Patterns.push_back(Pattern);
3841       Found = true;
3842     }
3843   };
3844 
3845   typedef MachineCombinerPattern MCP;
3846 
3847   switch (Opc) {
3848   default:
3849     break;
3850   case AArch64::ADDWrr:
3851     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3852            "ADDWrr does not have register operands");
3853     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
3854     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
3855     break;
3856   case AArch64::ADDXrr:
3857     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
3858     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
3859     break;
3860   case AArch64::SUBWrr:
3861     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
3862     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
3863     break;
3864   case AArch64::SUBXrr:
3865     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
3866     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
3867     break;
3868   case AArch64::ADDWri:
3869     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
3870     break;
3871   case AArch64::ADDXri:
3872     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
3873     break;
3874   case AArch64::SUBWri:
3875     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
3876     break;
3877   case AArch64::SUBXri:
3878     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
3879     break;
3880   case AArch64::ADDv8i8:
3881     setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
3882     setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
3883     break;
3884   case AArch64::ADDv16i8:
3885     setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
3886     setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
3887     break;
3888   case AArch64::ADDv4i16:
3889     setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
3890     setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
3891     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
3892     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
3893     break;
3894   case AArch64::ADDv8i16:
3895     setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
3896     setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
3897     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
3898     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
3899     break;
3900   case AArch64::ADDv2i32:
3901     setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
3902     setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
3903     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
3904     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
3905     break;
3906   case AArch64::ADDv4i32:
3907     setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
3908     setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
3909     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
3910     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
3911     break;
3912   case AArch64::SUBv8i8:
3913     setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
3914     setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
3915     break;
3916   case AArch64::SUBv16i8:
3917     setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
3918     setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
3919     break;
3920   case AArch64::SUBv4i16:
3921     setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
3922     setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
3923     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
3924     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
3925     break;
3926   case AArch64::SUBv8i16:
3927     setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
3928     setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
3929     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
3930     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
3931     break;
3932   case AArch64::SUBv2i32:
3933     setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
3934     setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
3935     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
3936     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
3937     break;
3938   case AArch64::SUBv4i32:
3939     setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
3940     setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
3941     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
3942     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
3943     break;
3944   }
3945   return Found;
3946 }
3947 /// Floating-Point Support
3948 
3949 /// Find instructions that can be turned into madd.
3950 static bool getFMAPatterns(MachineInstr &Root,
3951                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3952 
3953   if (!isCombineInstrCandidateFP(Root))
3954     return false;
3955 
3956   MachineBasicBlock &MBB = *Root.getParent();
3957   bool Found = false;
3958 
3959   auto Match = [&](int Opcode, int Operand,
3960                    MachineCombinerPattern Pattern) -> bool {
3961     if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
3962       Patterns.push_back(Pattern);
3963       return true;
3964     }
3965     return false;
3966   };
3967 
3968   typedef MachineCombinerPattern MCP;
3969 
3970   switch (Root.getOpcode()) {
3971   default:
3972     assert(false && "Unsupported FP instruction in combiner\n");
3973     break;
3974   case AArch64::FADDHrr:
3975     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3976            "FADDHrr does not have register operands");
3977 
3978     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
3979     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
3980     break;
3981   case AArch64::FADDSrr:
3982     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3983            "FADDSrr does not have register operands");
3984 
3985     Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
3986              Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
3987 
3988     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
3989              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
3990     break;
3991   case AArch64::FADDDrr:
3992     Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
3993              Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
3994 
3995     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
3996              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
3997     break;
3998   case AArch64::FADDv4f16:
3999     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
4000              Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
4001 
4002     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
4003              Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
4004     break;
4005   case AArch64::FADDv8f16:
4006     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
4007              Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
4008 
4009     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
4010              Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
4011     break;
4012   case AArch64::FADDv2f32:
4013     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
4014              Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
4015 
4016     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
4017              Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
4018     break;
4019   case AArch64::FADDv2f64:
4020     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
4021              Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
4022 
4023     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
4024              Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
4025     break;
4026   case AArch64::FADDv4f32:
4027     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
4028              Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
4029 
4030     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
4031              Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
4032     break;
4033   case AArch64::FSUBHrr:
4034     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
4035     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
4036     Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
4037     break;
4038   case AArch64::FSUBSrr:
4039     Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
4040 
4041     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
4042              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
4043 
4044     Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
4045     break;
4046   case AArch64::FSUBDrr:
4047     Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
4048 
4049     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
4050              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
4051 
4052     Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
4053     break;
4054   case AArch64::FSUBv4f16:
4055     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
4056              Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
4057 
4058     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
4059              Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
4060     break;
4061   case AArch64::FSUBv8f16:
4062     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
4063              Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
4064 
4065     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
4066              Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
4067     break;
4068   case AArch64::FSUBv2f32:
4069     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
4070              Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4071 
4072     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4073              Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4074     break;
4075   case AArch64::FSUBv2f64:
4076     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4077              Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4078 
4079     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4080              Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4081     break;
4082   case AArch64::FSUBv4f32:
4083     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4084              Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4085 
4086     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4087              Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4088     break;
4089   }
4090   return Found;
4091 }
4092 
4093 /// Return true when a code sequence can improve throughput. It
4094 /// should be called only for instructions in loops.
4095 /// \param Pattern - combiner pattern
4096 bool AArch64InstrInfo::isThroughputPattern(
4097     MachineCombinerPattern Pattern) const {
4098   switch (Pattern) {
4099   default:
4100     break;
4101   case MachineCombinerPattern::FMULADDH_OP1:
4102   case MachineCombinerPattern::FMULADDH_OP2:
4103   case MachineCombinerPattern::FMULSUBH_OP1:
4104   case MachineCombinerPattern::FMULSUBH_OP2:
4105   case MachineCombinerPattern::FMULADDS_OP1:
4106   case MachineCombinerPattern::FMULADDS_OP2:
4107   case MachineCombinerPattern::FMULSUBS_OP1:
4108   case MachineCombinerPattern::FMULSUBS_OP2:
4109   case MachineCombinerPattern::FMULADDD_OP1:
4110   case MachineCombinerPattern::FMULADDD_OP2:
4111   case MachineCombinerPattern::FMULSUBD_OP1:
4112   case MachineCombinerPattern::FMULSUBD_OP2:
4113   case MachineCombinerPattern::FNMULSUBH_OP1:
4114   case MachineCombinerPattern::FNMULSUBS_OP1:
4115   case MachineCombinerPattern::FNMULSUBD_OP1:
4116   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4117   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4118   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4119   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4120   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4121   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4122   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4123   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4124   case MachineCombinerPattern::FMLAv4f16_OP2:
4125   case MachineCombinerPattern::FMLAv4f16_OP1:
4126   case MachineCombinerPattern::FMLAv8f16_OP1:
4127   case MachineCombinerPattern::FMLAv8f16_OP2:
4128   case MachineCombinerPattern::FMLAv2f32_OP2:
4129   case MachineCombinerPattern::FMLAv2f32_OP1:
4130   case MachineCombinerPattern::FMLAv2f64_OP1:
4131   case MachineCombinerPattern::FMLAv2f64_OP2:
4132   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4133   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4134   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4135   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4136   case MachineCombinerPattern::FMLAv4f32_OP1:
4137   case MachineCombinerPattern::FMLAv4f32_OP2:
4138   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4139   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4140   case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
4141   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
4142   case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
4143   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
4144   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4145   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4146   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4147   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4148   case MachineCombinerPattern::FMLSv4f16_OP1:
4149   case MachineCombinerPattern::FMLSv4f16_OP2:
4150   case MachineCombinerPattern::FMLSv8f16_OP1:
4151   case MachineCombinerPattern::FMLSv8f16_OP2:
4152   case MachineCombinerPattern::FMLSv2f32_OP2:
4153   case MachineCombinerPattern::FMLSv2f64_OP2:
4154   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4155   case MachineCombinerPattern::FMLSv4f32_OP2:
4156   case MachineCombinerPattern::MULADDv8i8_OP1:
4157   case MachineCombinerPattern::MULADDv8i8_OP2:
4158   case MachineCombinerPattern::MULADDv16i8_OP1:
4159   case MachineCombinerPattern::MULADDv16i8_OP2:
4160   case MachineCombinerPattern::MULADDv4i16_OP1:
4161   case MachineCombinerPattern::MULADDv4i16_OP2:
4162   case MachineCombinerPattern::MULADDv8i16_OP1:
4163   case MachineCombinerPattern::MULADDv8i16_OP2:
4164   case MachineCombinerPattern::MULADDv2i32_OP1:
4165   case MachineCombinerPattern::MULADDv2i32_OP2:
4166   case MachineCombinerPattern::MULADDv4i32_OP1:
4167   case MachineCombinerPattern::MULADDv4i32_OP2:
4168   case MachineCombinerPattern::MULSUBv8i8_OP1:
4169   case MachineCombinerPattern::MULSUBv8i8_OP2:
4170   case MachineCombinerPattern::MULSUBv16i8_OP1:
4171   case MachineCombinerPattern::MULSUBv16i8_OP2:
4172   case MachineCombinerPattern::MULSUBv4i16_OP1:
4173   case MachineCombinerPattern::MULSUBv4i16_OP2:
4174   case MachineCombinerPattern::MULSUBv8i16_OP1:
4175   case MachineCombinerPattern::MULSUBv8i16_OP2:
4176   case MachineCombinerPattern::MULSUBv2i32_OP1:
4177   case MachineCombinerPattern::MULSUBv2i32_OP2:
4178   case MachineCombinerPattern::MULSUBv4i32_OP1:
4179   case MachineCombinerPattern::MULSUBv4i32_OP2:
4180   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4181   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4182   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4183   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4184   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4185   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4186   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4187   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4188   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4189   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4190   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4191   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4192   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4193   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4194   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4195   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4196     return true;
4197   } // end switch (Pattern)
4198   return false;
4199 }
4200 /// Return true when there is potentially a faster code sequence for an
4201 /// instruction chain ending in \p Root. All potential patterns are listed in
4202 /// the \p Pattern vector. Pattern should be sorted in priority order since the
4203 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4204 
4205 bool AArch64InstrInfo::getMachineCombinerPatterns(
4206     MachineInstr &Root,
4207     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4208   // Integer patterns
4209   if (getMaddPatterns(Root, Patterns))
4210     return true;
4211   // Floating point patterns
4212   if (getFMAPatterns(Root, Patterns))
4213     return true;
4214 
4215   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4216 }
4217 
4218 enum class FMAInstKind { Default, Indexed, Accumulator };
4219 /// genFusedMultiply - Generate fused multiply instructions.
4220 /// This function supports both integer and floating point instructions.
4221 /// A typical example:
4222 ///  F|MUL I=A,B,0
4223 ///  F|ADD R,I,C
4224 ///  ==> F|MADD R,A,B,C
4225 /// \param MF Containing MachineFunction
4226 /// \param MRI Register information
4227 /// \param TII Target information
4228 /// \param Root is the F|ADD instruction
4229 /// \param [out] InsInstrs is a vector of machine instructions and will
4230 /// contain the generated madd instruction
4231 /// \param IdxMulOpd is index of operand in Root that is the result of
4232 /// the F|MUL. In the example above IdxMulOpd is 1.
4233 /// \param MaddOpc the opcode fo the f|madd instruction
4234 /// \param RC Register class of operands
4235 /// \param kind of fma instruction (addressing mode) to be generated
4236 /// \param ReplacedAddend is the result register from the instruction
4237 /// replacing the non-combined operand, if any.
4238 static MachineInstr *
4239 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4240                  const TargetInstrInfo *TII, MachineInstr &Root,
4241                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4242                  unsigned MaddOpc, const TargetRegisterClass *RC,
4243                  FMAInstKind kind = FMAInstKind::Default,
4244                  const Register *ReplacedAddend = nullptr) {
4245   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4246 
4247   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4248   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4249   Register ResultReg = Root.getOperand(0).getReg();
4250   Register SrcReg0 = MUL->getOperand(1).getReg();
4251   bool Src0IsKill = MUL->getOperand(1).isKill();
4252   Register SrcReg1 = MUL->getOperand(2).getReg();
4253   bool Src1IsKill = MUL->getOperand(2).isKill();
4254 
4255   unsigned SrcReg2;
4256   bool Src2IsKill;
4257   if (ReplacedAddend) {
4258     // If we just generated a new addend, we must be it's only use.
4259     SrcReg2 = *ReplacedAddend;
4260     Src2IsKill = true;
4261   } else {
4262     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4263     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4264   }
4265 
4266   if (Register::isVirtualRegister(ResultReg))
4267     MRI.constrainRegClass(ResultReg, RC);
4268   if (Register::isVirtualRegister(SrcReg0))
4269     MRI.constrainRegClass(SrcReg0, RC);
4270   if (Register::isVirtualRegister(SrcReg1))
4271     MRI.constrainRegClass(SrcReg1, RC);
4272   if (Register::isVirtualRegister(SrcReg2))
4273     MRI.constrainRegClass(SrcReg2, RC);
4274 
4275   MachineInstrBuilder MIB;
4276   if (kind == FMAInstKind::Default)
4277     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4278               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4279               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4280               .addReg(SrcReg2, getKillRegState(Src2IsKill));
4281   else if (kind == FMAInstKind::Indexed)
4282     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4283               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4284               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4285               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4286               .addImm(MUL->getOperand(3).getImm());
4287   else if (kind == FMAInstKind::Accumulator)
4288     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4289               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4290               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4291               .addReg(SrcReg1, getKillRegState(Src1IsKill));
4292   else
4293     assert(false && "Invalid FMA instruction kind \n");
4294   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4295   InsInstrs.push_back(MIB);
4296   return MUL;
4297 }
4298 
4299 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
4300 /// instructions.
4301 ///
4302 /// \see genFusedMultiply
4303 static MachineInstr *genFusedMultiplyAcc(
4304     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4305     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4306     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4307   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4308                           FMAInstKind::Accumulator);
4309 }
4310 
4311 /// genNeg - Helper to generate an intermediate negation of the second operand
4312 /// of Root
4313 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
4314                        const TargetInstrInfo *TII, MachineInstr &Root,
4315                        SmallVectorImpl<MachineInstr *> &InsInstrs,
4316                        DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
4317                        unsigned MnegOpc, const TargetRegisterClass *RC) {
4318   Register NewVR = MRI.createVirtualRegister(RC);
4319   MachineInstrBuilder MIB =
4320       BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
4321           .add(Root.getOperand(2));
4322   InsInstrs.push_back(MIB);
4323 
4324   assert(InstrIdxForVirtReg.empty());
4325   InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4326 
4327   return NewVR;
4328 }
4329 
4330 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4331 /// instructions with an additional negation of the accumulator
4332 static MachineInstr *genFusedMultiplyAccNeg(
4333     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4334     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4335     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4336     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4337   assert(IdxMulOpd == 1);
4338 
4339   Register NewVR =
4340       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4341   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4342                           FMAInstKind::Accumulator, &NewVR);
4343 }
4344 
4345 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
4346 /// instructions.
4347 ///
4348 /// \see genFusedMultiply
4349 static MachineInstr *genFusedMultiplyIdx(
4350     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4351     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4352     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4353   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4354                           FMAInstKind::Indexed);
4355 }
4356 
4357 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4358 /// instructions with an additional negation of the accumulator
4359 static MachineInstr *genFusedMultiplyIdxNeg(
4360     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4361     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4362     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4363     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4364   assert(IdxMulOpd == 1);
4365 
4366   Register NewVR =
4367       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4368 
4369   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4370                           FMAInstKind::Indexed, &NewVR);
4371 }
4372 
4373 /// genMaddR - Generate madd instruction and combine mul and add using
4374 /// an extra virtual register
4375 /// Example - an ADD intermediate needs to be stored in a register:
4376 ///   MUL I=A,B,0
4377 ///   ADD R,I,Imm
4378 ///   ==> ORR  V, ZR, Imm
4379 ///   ==> MADD R,A,B,V
4380 /// \param MF Containing MachineFunction
4381 /// \param MRI Register information
4382 /// \param TII Target information
4383 /// \param Root is the ADD instruction
4384 /// \param [out] InsInstrs is a vector of machine instructions and will
4385 /// contain the generated madd instruction
4386 /// \param IdxMulOpd is index of operand in Root that is the result of
4387 /// the MUL. In the example above IdxMulOpd is 1.
4388 /// \param MaddOpc the opcode fo the madd instruction
4389 /// \param VR is a virtual register that holds the value of an ADD operand
4390 /// (V in the example above).
4391 /// \param RC Register class of operands
4392 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4393                               const TargetInstrInfo *TII, MachineInstr &Root,
4394                               SmallVectorImpl<MachineInstr *> &InsInstrs,
4395                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4396                               const TargetRegisterClass *RC) {
4397   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4398 
4399   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4400   Register ResultReg = Root.getOperand(0).getReg();
4401   Register SrcReg0 = MUL->getOperand(1).getReg();
4402   bool Src0IsKill = MUL->getOperand(1).isKill();
4403   Register SrcReg1 = MUL->getOperand(2).getReg();
4404   bool Src1IsKill = MUL->getOperand(2).isKill();
4405 
4406   if (Register::isVirtualRegister(ResultReg))
4407     MRI.constrainRegClass(ResultReg, RC);
4408   if (Register::isVirtualRegister(SrcReg0))
4409     MRI.constrainRegClass(SrcReg0, RC);
4410   if (Register::isVirtualRegister(SrcReg1))
4411     MRI.constrainRegClass(SrcReg1, RC);
4412   if (Register::isVirtualRegister(VR))
4413     MRI.constrainRegClass(VR, RC);
4414 
4415   MachineInstrBuilder MIB =
4416       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4417           .addReg(SrcReg0, getKillRegState(Src0IsKill))
4418           .addReg(SrcReg1, getKillRegState(Src1IsKill))
4419           .addReg(VR);
4420   // Insert the MADD
4421   InsInstrs.push_back(MIB);
4422   return MUL;
4423 }
4424 
4425 /// When getMachineCombinerPatterns() finds potential patterns,
4426 /// this function generates the instructions that could replace the
4427 /// original code sequence
4428 void AArch64InstrInfo::genAlternativeCodeSequence(
4429     MachineInstr &Root, MachineCombinerPattern Pattern,
4430     SmallVectorImpl<MachineInstr *> &InsInstrs,
4431     SmallVectorImpl<MachineInstr *> &DelInstrs,
4432     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4433   MachineBasicBlock &MBB = *Root.getParent();
4434   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4435   MachineFunction &MF = *MBB.getParent();
4436   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4437 
4438   MachineInstr *MUL;
4439   const TargetRegisterClass *RC;
4440   unsigned Opc;
4441   switch (Pattern) {
4442   default:
4443     // Reassociate instructions.
4444     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4445                                                 DelInstrs, InstrIdxForVirtReg);
4446     return;
4447   case MachineCombinerPattern::MULADDW_OP1:
4448   case MachineCombinerPattern::MULADDX_OP1:
4449     // MUL I=A,B,0
4450     // ADD R,I,C
4451     // ==> MADD R,A,B,C
4452     // --- Create(MADD);
4453     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4454       Opc = AArch64::MADDWrrr;
4455       RC = &AArch64::GPR32RegClass;
4456     } else {
4457       Opc = AArch64::MADDXrrr;
4458       RC = &AArch64::GPR64RegClass;
4459     }
4460     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4461     break;
4462   case MachineCombinerPattern::MULADDW_OP2:
4463   case MachineCombinerPattern::MULADDX_OP2:
4464     // MUL I=A,B,0
4465     // ADD R,C,I
4466     // ==> MADD R,A,B,C
4467     // --- Create(MADD);
4468     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4469       Opc = AArch64::MADDWrrr;
4470       RC = &AArch64::GPR32RegClass;
4471     } else {
4472       Opc = AArch64::MADDXrrr;
4473       RC = &AArch64::GPR64RegClass;
4474     }
4475     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4476     break;
4477   case MachineCombinerPattern::MULADDWI_OP1:
4478   case MachineCombinerPattern::MULADDXI_OP1: {
4479     // MUL I=A,B,0
4480     // ADD R,I,Imm
4481     // ==> ORR  V, ZR, Imm
4482     // ==> MADD R,A,B,V
4483     // --- Create(MADD);
4484     const TargetRegisterClass *OrrRC;
4485     unsigned BitSize, OrrOpc, ZeroReg;
4486     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4487       OrrOpc = AArch64::ORRWri;
4488       OrrRC = &AArch64::GPR32spRegClass;
4489       BitSize = 32;
4490       ZeroReg = AArch64::WZR;
4491       Opc = AArch64::MADDWrrr;
4492       RC = &AArch64::GPR32RegClass;
4493     } else {
4494       OrrOpc = AArch64::ORRXri;
4495       OrrRC = &AArch64::GPR64spRegClass;
4496       BitSize = 64;
4497       ZeroReg = AArch64::XZR;
4498       Opc = AArch64::MADDXrrr;
4499       RC = &AArch64::GPR64RegClass;
4500     }
4501     Register NewVR = MRI.createVirtualRegister(OrrRC);
4502     uint64_t Imm = Root.getOperand(2).getImm();
4503 
4504     if (Root.getOperand(3).isImm()) {
4505       unsigned Val = Root.getOperand(3).getImm();
4506       Imm = Imm << Val;
4507     }
4508     uint64_t UImm = SignExtend64(Imm, BitSize);
4509     uint64_t Encoding;
4510     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4511       MachineInstrBuilder MIB1 =
4512           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4513               .addReg(ZeroReg)
4514               .addImm(Encoding);
4515       InsInstrs.push_back(MIB1);
4516       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4517       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4518     }
4519     break;
4520   }
4521   case MachineCombinerPattern::MULSUBW_OP1:
4522   case MachineCombinerPattern::MULSUBX_OP1: {
4523     // MUL I=A,B,0
4524     // SUB R,I, C
4525     // ==> SUB  V, 0, C
4526     // ==> MADD R,A,B,V // = -C + A*B
4527     // --- Create(MADD);
4528     const TargetRegisterClass *SubRC;
4529     unsigned SubOpc, ZeroReg;
4530     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4531       SubOpc = AArch64::SUBWrr;
4532       SubRC = &AArch64::GPR32spRegClass;
4533       ZeroReg = AArch64::WZR;
4534       Opc = AArch64::MADDWrrr;
4535       RC = &AArch64::GPR32RegClass;
4536     } else {
4537       SubOpc = AArch64::SUBXrr;
4538       SubRC = &AArch64::GPR64spRegClass;
4539       ZeroReg = AArch64::XZR;
4540       Opc = AArch64::MADDXrrr;
4541       RC = &AArch64::GPR64RegClass;
4542     }
4543     Register NewVR = MRI.createVirtualRegister(SubRC);
4544     // SUB NewVR, 0, C
4545     MachineInstrBuilder MIB1 =
4546         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4547             .addReg(ZeroReg)
4548             .add(Root.getOperand(2));
4549     InsInstrs.push_back(MIB1);
4550     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4551     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4552     break;
4553   }
4554   case MachineCombinerPattern::MULSUBW_OP2:
4555   case MachineCombinerPattern::MULSUBX_OP2:
4556     // MUL I=A,B,0
4557     // SUB R,C,I
4558     // ==> MSUB R,A,B,C (computes C - A*B)
4559     // --- Create(MSUB);
4560     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4561       Opc = AArch64::MSUBWrrr;
4562       RC = &AArch64::GPR32RegClass;
4563     } else {
4564       Opc = AArch64::MSUBXrrr;
4565       RC = &AArch64::GPR64RegClass;
4566     }
4567     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4568     break;
4569   case MachineCombinerPattern::MULSUBWI_OP1:
4570   case MachineCombinerPattern::MULSUBXI_OP1: {
4571     // MUL I=A,B,0
4572     // SUB R,I, Imm
4573     // ==> ORR  V, ZR, -Imm
4574     // ==> MADD R,A,B,V // = -Imm + A*B
4575     // --- Create(MADD);
4576     const TargetRegisterClass *OrrRC;
4577     unsigned BitSize, OrrOpc, ZeroReg;
4578     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4579       OrrOpc = AArch64::ORRWri;
4580       OrrRC = &AArch64::GPR32spRegClass;
4581       BitSize = 32;
4582       ZeroReg = AArch64::WZR;
4583       Opc = AArch64::MADDWrrr;
4584       RC = &AArch64::GPR32RegClass;
4585     } else {
4586       OrrOpc = AArch64::ORRXri;
4587       OrrRC = &AArch64::GPR64spRegClass;
4588       BitSize = 64;
4589       ZeroReg = AArch64::XZR;
4590       Opc = AArch64::MADDXrrr;
4591       RC = &AArch64::GPR64RegClass;
4592     }
4593     Register NewVR = MRI.createVirtualRegister(OrrRC);
4594     uint64_t Imm = Root.getOperand(2).getImm();
4595     if (Root.getOperand(3).isImm()) {
4596       unsigned Val = Root.getOperand(3).getImm();
4597       Imm = Imm << Val;
4598     }
4599     uint64_t UImm = SignExtend64(-Imm, BitSize);
4600     uint64_t Encoding;
4601     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4602       MachineInstrBuilder MIB1 =
4603           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4604               .addReg(ZeroReg)
4605               .addImm(Encoding);
4606       InsInstrs.push_back(MIB1);
4607       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4608       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4609     }
4610     break;
4611   }
4612 
4613   case MachineCombinerPattern::MULADDv8i8_OP1:
4614     Opc = AArch64::MLAv8i8;
4615     RC = &AArch64::FPR64RegClass;
4616     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4617     break;
4618   case MachineCombinerPattern::MULADDv8i8_OP2:
4619     Opc = AArch64::MLAv8i8;
4620     RC = &AArch64::FPR64RegClass;
4621     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4622     break;
4623   case MachineCombinerPattern::MULADDv16i8_OP1:
4624     Opc = AArch64::MLAv16i8;
4625     RC = &AArch64::FPR128RegClass;
4626     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4627     break;
4628   case MachineCombinerPattern::MULADDv16i8_OP2:
4629     Opc = AArch64::MLAv16i8;
4630     RC = &AArch64::FPR128RegClass;
4631     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4632     break;
4633   case MachineCombinerPattern::MULADDv4i16_OP1:
4634     Opc = AArch64::MLAv4i16;
4635     RC = &AArch64::FPR64RegClass;
4636     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4637     break;
4638   case MachineCombinerPattern::MULADDv4i16_OP2:
4639     Opc = AArch64::MLAv4i16;
4640     RC = &AArch64::FPR64RegClass;
4641     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4642     break;
4643   case MachineCombinerPattern::MULADDv8i16_OP1:
4644     Opc = AArch64::MLAv8i16;
4645     RC = &AArch64::FPR128RegClass;
4646     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4647     break;
4648   case MachineCombinerPattern::MULADDv8i16_OP2:
4649     Opc = AArch64::MLAv8i16;
4650     RC = &AArch64::FPR128RegClass;
4651     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4652     break;
4653   case MachineCombinerPattern::MULADDv2i32_OP1:
4654     Opc = AArch64::MLAv2i32;
4655     RC = &AArch64::FPR64RegClass;
4656     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4657     break;
4658   case MachineCombinerPattern::MULADDv2i32_OP2:
4659     Opc = AArch64::MLAv2i32;
4660     RC = &AArch64::FPR64RegClass;
4661     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4662     break;
4663   case MachineCombinerPattern::MULADDv4i32_OP1:
4664     Opc = AArch64::MLAv4i32;
4665     RC = &AArch64::FPR128RegClass;
4666     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4667     break;
4668   case MachineCombinerPattern::MULADDv4i32_OP2:
4669     Opc = AArch64::MLAv4i32;
4670     RC = &AArch64::FPR128RegClass;
4671     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4672     break;
4673 
4674   case MachineCombinerPattern::MULSUBv8i8_OP1:
4675     Opc = AArch64::MLAv8i8;
4676     RC = &AArch64::FPR64RegClass;
4677     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4678                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
4679                                  RC);
4680     break;
4681   case MachineCombinerPattern::MULSUBv8i8_OP2:
4682     Opc = AArch64::MLSv8i8;
4683     RC = &AArch64::FPR64RegClass;
4684     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4685     break;
4686   case MachineCombinerPattern::MULSUBv16i8_OP1:
4687     Opc = AArch64::MLAv16i8;
4688     RC = &AArch64::FPR128RegClass;
4689     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4690                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
4691                                  RC);
4692     break;
4693   case MachineCombinerPattern::MULSUBv16i8_OP2:
4694     Opc = AArch64::MLSv16i8;
4695     RC = &AArch64::FPR128RegClass;
4696     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4697     break;
4698   case MachineCombinerPattern::MULSUBv4i16_OP1:
4699     Opc = AArch64::MLAv4i16;
4700     RC = &AArch64::FPR64RegClass;
4701     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4702                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4703                                  RC);
4704     break;
4705   case MachineCombinerPattern::MULSUBv4i16_OP2:
4706     Opc = AArch64::MLSv4i16;
4707     RC = &AArch64::FPR64RegClass;
4708     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4709     break;
4710   case MachineCombinerPattern::MULSUBv8i16_OP1:
4711     Opc = AArch64::MLAv8i16;
4712     RC = &AArch64::FPR128RegClass;
4713     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4714                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4715                                  RC);
4716     break;
4717   case MachineCombinerPattern::MULSUBv8i16_OP2:
4718     Opc = AArch64::MLSv8i16;
4719     RC = &AArch64::FPR128RegClass;
4720     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4721     break;
4722   case MachineCombinerPattern::MULSUBv2i32_OP1:
4723     Opc = AArch64::MLAv2i32;
4724     RC = &AArch64::FPR64RegClass;
4725     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4726                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4727                                  RC);
4728     break;
4729   case MachineCombinerPattern::MULSUBv2i32_OP2:
4730     Opc = AArch64::MLSv2i32;
4731     RC = &AArch64::FPR64RegClass;
4732     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4733     break;
4734   case MachineCombinerPattern::MULSUBv4i32_OP1:
4735     Opc = AArch64::MLAv4i32;
4736     RC = &AArch64::FPR128RegClass;
4737     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4738                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4739                                  RC);
4740     break;
4741   case MachineCombinerPattern::MULSUBv4i32_OP2:
4742     Opc = AArch64::MLSv4i32;
4743     RC = &AArch64::FPR128RegClass;
4744     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4745     break;
4746 
4747   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4748     Opc = AArch64::MLAv4i16_indexed;
4749     RC = &AArch64::FPR64RegClass;
4750     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4751     break;
4752   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4753     Opc = AArch64::MLAv4i16_indexed;
4754     RC = &AArch64::FPR64RegClass;
4755     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4756     break;
4757   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4758     Opc = AArch64::MLAv8i16_indexed;
4759     RC = &AArch64::FPR128RegClass;
4760     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4761     break;
4762   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4763     Opc = AArch64::MLAv8i16_indexed;
4764     RC = &AArch64::FPR128RegClass;
4765     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4766     break;
4767   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4768     Opc = AArch64::MLAv2i32_indexed;
4769     RC = &AArch64::FPR64RegClass;
4770     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4771     break;
4772   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4773     Opc = AArch64::MLAv2i32_indexed;
4774     RC = &AArch64::FPR64RegClass;
4775     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4776     break;
4777   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4778     Opc = AArch64::MLAv4i32_indexed;
4779     RC = &AArch64::FPR128RegClass;
4780     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4781     break;
4782   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4783     Opc = AArch64::MLAv4i32_indexed;
4784     RC = &AArch64::FPR128RegClass;
4785     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4786     break;
4787 
4788   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4789     Opc = AArch64::MLAv4i16_indexed;
4790     RC = &AArch64::FPR64RegClass;
4791     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4792                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4793                                  RC);
4794     break;
4795   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4796     Opc = AArch64::MLSv4i16_indexed;
4797     RC = &AArch64::FPR64RegClass;
4798     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4799     break;
4800   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4801     Opc = AArch64::MLAv8i16_indexed;
4802     RC = &AArch64::FPR128RegClass;
4803     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4804                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4805                                  RC);
4806     break;
4807   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4808     Opc = AArch64::MLSv8i16_indexed;
4809     RC = &AArch64::FPR128RegClass;
4810     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4811     break;
4812   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4813     Opc = AArch64::MLAv2i32_indexed;
4814     RC = &AArch64::FPR64RegClass;
4815     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4816                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4817                                  RC);
4818     break;
4819   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4820     Opc = AArch64::MLSv2i32_indexed;
4821     RC = &AArch64::FPR64RegClass;
4822     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4823     break;
4824   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4825     Opc = AArch64::MLAv4i32_indexed;
4826     RC = &AArch64::FPR128RegClass;
4827     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4828                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4829                                  RC);
4830     break;
4831   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4832     Opc = AArch64::MLSv4i32_indexed;
4833     RC = &AArch64::FPR128RegClass;
4834     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4835     break;
4836 
4837   // Floating Point Support
4838   case MachineCombinerPattern::FMULADDH_OP1:
4839     Opc = AArch64::FMADDHrrr;
4840     RC = &AArch64::FPR16RegClass;
4841     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4842     break;
4843   case MachineCombinerPattern::FMULADDS_OP1:
4844     Opc = AArch64::FMADDSrrr;
4845     RC = &AArch64::FPR32RegClass;
4846     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4847     break;
4848   case MachineCombinerPattern::FMULADDD_OP1:
4849     Opc = AArch64::FMADDDrrr;
4850     RC = &AArch64::FPR64RegClass;
4851     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4852     break;
4853 
4854   case MachineCombinerPattern::FMULADDH_OP2:
4855     Opc = AArch64::FMADDHrrr;
4856     RC = &AArch64::FPR16RegClass;
4857     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4858     break;
4859   case MachineCombinerPattern::FMULADDS_OP2:
4860     Opc = AArch64::FMADDSrrr;
4861     RC = &AArch64::FPR32RegClass;
4862     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4863     break;
4864   case MachineCombinerPattern::FMULADDD_OP2:
4865     Opc = AArch64::FMADDDrrr;
4866     RC = &AArch64::FPR64RegClass;
4867     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4868     break;
4869 
4870   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4871     Opc = AArch64::FMLAv1i32_indexed;
4872     RC = &AArch64::FPR32RegClass;
4873     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4874                            FMAInstKind::Indexed);
4875     break;
4876   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4877     Opc = AArch64::FMLAv1i32_indexed;
4878     RC = &AArch64::FPR32RegClass;
4879     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4880                            FMAInstKind::Indexed);
4881     break;
4882 
4883   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4884     Opc = AArch64::FMLAv1i64_indexed;
4885     RC = &AArch64::FPR64RegClass;
4886     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4887                            FMAInstKind::Indexed);
4888     break;
4889   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4890     Opc = AArch64::FMLAv1i64_indexed;
4891     RC = &AArch64::FPR64RegClass;
4892     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4893                            FMAInstKind::Indexed);
4894     break;
4895 
4896   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4897     RC = &AArch64::FPR64RegClass;
4898     Opc = AArch64::FMLAv4i16_indexed;
4899     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4900                            FMAInstKind::Indexed);
4901     break;
4902   case MachineCombinerPattern::FMLAv4f16_OP1:
4903     RC = &AArch64::FPR64RegClass;
4904     Opc = AArch64::FMLAv4f16;
4905     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4906                            FMAInstKind::Accumulator);
4907     break;
4908   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4909     RC = &AArch64::FPR64RegClass;
4910     Opc = AArch64::FMLAv4i16_indexed;
4911     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4912                            FMAInstKind::Indexed);
4913     break;
4914   case MachineCombinerPattern::FMLAv4f16_OP2:
4915     RC = &AArch64::FPR64RegClass;
4916     Opc = AArch64::FMLAv4f16;
4917     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4918                            FMAInstKind::Accumulator);
4919     break;
4920 
4921   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4922   case MachineCombinerPattern::FMLAv2f32_OP1:
4923     RC = &AArch64::FPR64RegClass;
4924     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4925       Opc = AArch64::FMLAv2i32_indexed;
4926       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4927                              FMAInstKind::Indexed);
4928     } else {
4929       Opc = AArch64::FMLAv2f32;
4930       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4931                              FMAInstKind::Accumulator);
4932     }
4933     break;
4934   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4935   case MachineCombinerPattern::FMLAv2f32_OP2:
4936     RC = &AArch64::FPR64RegClass;
4937     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4938       Opc = AArch64::FMLAv2i32_indexed;
4939       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4940                              FMAInstKind::Indexed);
4941     } else {
4942       Opc = AArch64::FMLAv2f32;
4943       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4944                              FMAInstKind::Accumulator);
4945     }
4946     break;
4947 
4948   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4949     RC = &AArch64::FPR128RegClass;
4950     Opc = AArch64::FMLAv8i16_indexed;
4951     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4952                            FMAInstKind::Indexed);
4953     break;
4954   case MachineCombinerPattern::FMLAv8f16_OP1:
4955     RC = &AArch64::FPR128RegClass;
4956     Opc = AArch64::FMLAv8f16;
4957     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4958                            FMAInstKind::Accumulator);
4959     break;
4960   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4961     RC = &AArch64::FPR128RegClass;
4962     Opc = AArch64::FMLAv8i16_indexed;
4963     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4964                            FMAInstKind::Indexed);
4965     break;
4966   case MachineCombinerPattern::FMLAv8f16_OP2:
4967     RC = &AArch64::FPR128RegClass;
4968     Opc = AArch64::FMLAv8f16;
4969     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4970                            FMAInstKind::Accumulator);
4971     break;
4972 
4973   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4974   case MachineCombinerPattern::FMLAv2f64_OP1:
4975     RC = &AArch64::FPR128RegClass;
4976     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4977       Opc = AArch64::FMLAv2i64_indexed;
4978       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4979                              FMAInstKind::Indexed);
4980     } else {
4981       Opc = AArch64::FMLAv2f64;
4982       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4983                              FMAInstKind::Accumulator);
4984     }
4985     break;
4986   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4987   case MachineCombinerPattern::FMLAv2f64_OP2:
4988     RC = &AArch64::FPR128RegClass;
4989     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4990       Opc = AArch64::FMLAv2i64_indexed;
4991       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4992                              FMAInstKind::Indexed);
4993     } else {
4994       Opc = AArch64::FMLAv2f64;
4995       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4996                              FMAInstKind::Accumulator);
4997     }
4998     break;
4999 
5000   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
5001   case MachineCombinerPattern::FMLAv4f32_OP1:
5002     RC = &AArch64::FPR128RegClass;
5003     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
5004       Opc = AArch64::FMLAv4i32_indexed;
5005       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5006                              FMAInstKind::Indexed);
5007     } else {
5008       Opc = AArch64::FMLAv4f32;
5009       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5010                              FMAInstKind::Accumulator);
5011     }
5012     break;
5013 
5014   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
5015   case MachineCombinerPattern::FMLAv4f32_OP2:
5016     RC = &AArch64::FPR128RegClass;
5017     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
5018       Opc = AArch64::FMLAv4i32_indexed;
5019       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5020                              FMAInstKind::Indexed);
5021     } else {
5022       Opc = AArch64::FMLAv4f32;
5023       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5024                              FMAInstKind::Accumulator);
5025     }
5026     break;
5027 
5028   case MachineCombinerPattern::FMULSUBH_OP1:
5029     Opc = AArch64::FNMSUBHrrr;
5030     RC = &AArch64::FPR16RegClass;
5031     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5032     break;
5033   case MachineCombinerPattern::FMULSUBS_OP1:
5034     Opc = AArch64::FNMSUBSrrr;
5035     RC = &AArch64::FPR32RegClass;
5036     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5037     break;
5038   case MachineCombinerPattern::FMULSUBD_OP1:
5039     Opc = AArch64::FNMSUBDrrr;
5040     RC = &AArch64::FPR64RegClass;
5041     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5042     break;
5043 
5044   case MachineCombinerPattern::FNMULSUBH_OP1:
5045     Opc = AArch64::FNMADDHrrr;
5046     RC = &AArch64::FPR16RegClass;
5047     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5048     break;
5049   case MachineCombinerPattern::FNMULSUBS_OP1:
5050     Opc = AArch64::FNMADDSrrr;
5051     RC = &AArch64::FPR32RegClass;
5052     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5053     break;
5054   case MachineCombinerPattern::FNMULSUBD_OP1:
5055     Opc = AArch64::FNMADDDrrr;
5056     RC = &AArch64::FPR64RegClass;
5057     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5058     break;
5059 
5060   case MachineCombinerPattern::FMULSUBH_OP2:
5061     Opc = AArch64::FMSUBHrrr;
5062     RC = &AArch64::FPR16RegClass;
5063     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5064     break;
5065   case MachineCombinerPattern::FMULSUBS_OP2:
5066     Opc = AArch64::FMSUBSrrr;
5067     RC = &AArch64::FPR32RegClass;
5068     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5069     break;
5070   case MachineCombinerPattern::FMULSUBD_OP2:
5071     Opc = AArch64::FMSUBDrrr;
5072     RC = &AArch64::FPR64RegClass;
5073     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5074     break;
5075 
5076   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5077     Opc = AArch64::FMLSv1i32_indexed;
5078     RC = &AArch64::FPR32RegClass;
5079     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5080                            FMAInstKind::Indexed);
5081     break;
5082 
5083   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5084     Opc = AArch64::FMLSv1i64_indexed;
5085     RC = &AArch64::FPR64RegClass;
5086     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5087                            FMAInstKind::Indexed);
5088     break;
5089 
5090   case MachineCombinerPattern::FMLSv4f16_OP1:
5091   case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
5092     RC = &AArch64::FPR64RegClass;
5093     Register NewVR = MRI.createVirtualRegister(RC);
5094     MachineInstrBuilder MIB1 =
5095         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
5096             .add(Root.getOperand(2));
5097     InsInstrs.push_back(MIB1);
5098     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5099     if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
5100       Opc = AArch64::FMLAv4f16;
5101       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5102                              FMAInstKind::Accumulator, &NewVR);
5103     } else {
5104       Opc = AArch64::FMLAv4i16_indexed;
5105       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5106                              FMAInstKind::Indexed, &NewVR);
5107     }
5108     break;
5109   }
5110   case MachineCombinerPattern::FMLSv4f16_OP2:
5111     RC = &AArch64::FPR64RegClass;
5112     Opc = AArch64::FMLSv4f16;
5113     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5114                            FMAInstKind::Accumulator);
5115     break;
5116   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5117     RC = &AArch64::FPR64RegClass;
5118     Opc = AArch64::FMLSv4i16_indexed;
5119     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5120                            FMAInstKind::Indexed);
5121     break;
5122 
5123   case MachineCombinerPattern::FMLSv2f32_OP2:
5124   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5125     RC = &AArch64::FPR64RegClass;
5126     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
5127       Opc = AArch64::FMLSv2i32_indexed;
5128       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5129                              FMAInstKind::Indexed);
5130     } else {
5131       Opc = AArch64::FMLSv2f32;
5132       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5133                              FMAInstKind::Accumulator);
5134     }
5135     break;
5136 
5137   case MachineCombinerPattern::FMLSv8f16_OP1:
5138   case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
5139     RC = &AArch64::FPR128RegClass;
5140     Register NewVR = MRI.createVirtualRegister(RC);
5141     MachineInstrBuilder MIB1 =
5142         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
5143             .add(Root.getOperand(2));
5144     InsInstrs.push_back(MIB1);
5145     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5146     if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
5147       Opc = AArch64::FMLAv8f16;
5148       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5149                              FMAInstKind::Accumulator, &NewVR);
5150     } else {
5151       Opc = AArch64::FMLAv8i16_indexed;
5152       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5153                              FMAInstKind::Indexed, &NewVR);
5154     }
5155     break;
5156   }
5157   case MachineCombinerPattern::FMLSv8f16_OP2:
5158     RC = &AArch64::FPR128RegClass;
5159     Opc = AArch64::FMLSv8f16;
5160     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5161                            FMAInstKind::Accumulator);
5162     break;
5163   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5164     RC = &AArch64::FPR128RegClass;
5165     Opc = AArch64::FMLSv8i16_indexed;
5166     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5167                            FMAInstKind::Indexed);
5168     break;
5169 
5170   case MachineCombinerPattern::FMLSv2f64_OP2:
5171   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5172     RC = &AArch64::FPR128RegClass;
5173     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
5174       Opc = AArch64::FMLSv2i64_indexed;
5175       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5176                              FMAInstKind::Indexed);
5177     } else {
5178       Opc = AArch64::FMLSv2f64;
5179       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5180                              FMAInstKind::Accumulator);
5181     }
5182     break;
5183 
5184   case MachineCombinerPattern::FMLSv4f32_OP2:
5185   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5186     RC = &AArch64::FPR128RegClass;
5187     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
5188       Opc = AArch64::FMLSv4i32_indexed;
5189       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5190                              FMAInstKind::Indexed);
5191     } else {
5192       Opc = AArch64::FMLSv4f32;
5193       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5194                              FMAInstKind::Accumulator);
5195     }
5196     break;
5197   case MachineCombinerPattern::FMLSv2f32_OP1:
5198   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
5199     RC = &AArch64::FPR64RegClass;
5200     Register NewVR = MRI.createVirtualRegister(RC);
5201     MachineInstrBuilder MIB1 =
5202         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
5203             .add(Root.getOperand(2));
5204     InsInstrs.push_back(MIB1);
5205     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5206     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
5207       Opc = AArch64::FMLAv2i32_indexed;
5208       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5209                              FMAInstKind::Indexed, &NewVR);
5210     } else {
5211       Opc = AArch64::FMLAv2f32;
5212       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5213                              FMAInstKind::Accumulator, &NewVR);
5214     }
5215     break;
5216   }
5217   case MachineCombinerPattern::FMLSv4f32_OP1:
5218   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
5219     RC = &AArch64::FPR128RegClass;
5220     Register NewVR = MRI.createVirtualRegister(RC);
5221     MachineInstrBuilder MIB1 =
5222         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
5223             .add(Root.getOperand(2));
5224     InsInstrs.push_back(MIB1);
5225     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5226     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
5227       Opc = AArch64::FMLAv4i32_indexed;
5228       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5229                              FMAInstKind::Indexed, &NewVR);
5230     } else {
5231       Opc = AArch64::FMLAv4f32;
5232       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5233                              FMAInstKind::Accumulator, &NewVR);
5234     }
5235     break;
5236   }
5237   case MachineCombinerPattern::FMLSv2f64_OP1:
5238   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
5239     RC = &AArch64::FPR128RegClass;
5240     Register NewVR = MRI.createVirtualRegister(RC);
5241     MachineInstrBuilder MIB1 =
5242         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
5243             .add(Root.getOperand(2));
5244     InsInstrs.push_back(MIB1);
5245     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5246     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
5247       Opc = AArch64::FMLAv2i64_indexed;
5248       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5249                              FMAInstKind::Indexed, &NewVR);
5250     } else {
5251       Opc = AArch64::FMLAv2f64;
5252       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5253                              FMAInstKind::Accumulator, &NewVR);
5254     }
5255     break;
5256   }
5257   } // end switch (Pattern)
5258   // Record MUL and ADD/SUB for deletion
5259   DelInstrs.push_back(MUL);
5260   DelInstrs.push_back(&Root);
5261 }
5262 
5263 /// Replace csincr-branch sequence by simple conditional branch
5264 ///
5265 /// Examples:
5266 /// 1. \code
5267 ///   csinc  w9, wzr, wzr, <condition code>
5268 ///   tbnz   w9, #0, 0x44
5269 ///    \endcode
5270 /// to
5271 ///    \code
5272 ///   b.<inverted condition code>
5273 ///    \endcode
5274 ///
5275 /// 2. \code
5276 ///   csinc w9, wzr, wzr, <condition code>
5277 ///   tbz   w9, #0, 0x44
5278 ///    \endcode
5279 /// to
5280 ///    \code
5281 ///   b.<condition code>
5282 ///    \endcode
5283 ///
5284 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
5285 /// compare's constant operand is power of 2.
5286 ///
5287 /// Examples:
5288 ///    \code
5289 ///   and  w8, w8, #0x400
5290 ///   cbnz w8, L1
5291 ///    \endcode
5292 /// to
5293 ///    \code
5294 ///   tbnz w8, #10, L1
5295 ///    \endcode
5296 ///
5297 /// \param  MI Conditional Branch
5298 /// \return True when the simple conditional branch is generated
5299 ///
5300 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
5301   bool IsNegativeBranch = false;
5302   bool IsTestAndBranch = false;
5303   unsigned TargetBBInMI = 0;
5304   switch (MI.getOpcode()) {
5305   default:
5306     llvm_unreachable("Unknown branch instruction?");
5307   case AArch64::Bcc:
5308     return false;
5309   case AArch64::CBZW:
5310   case AArch64::CBZX:
5311     TargetBBInMI = 1;
5312     break;
5313   case AArch64::CBNZW:
5314   case AArch64::CBNZX:
5315     TargetBBInMI = 1;
5316     IsNegativeBranch = true;
5317     break;
5318   case AArch64::TBZW:
5319   case AArch64::TBZX:
5320     TargetBBInMI = 2;
5321     IsTestAndBranch = true;
5322     break;
5323   case AArch64::TBNZW:
5324   case AArch64::TBNZX:
5325     TargetBBInMI = 2;
5326     IsNegativeBranch = true;
5327     IsTestAndBranch = true;
5328     break;
5329   }
5330   // So we increment a zero register and test for bits other
5331   // than bit 0? Conservatively bail out in case the verifier
5332   // missed this case.
5333   if (IsTestAndBranch && MI.getOperand(1).getImm())
5334     return false;
5335 
5336   // Find Definition.
5337   assert(MI.getParent() && "Incomplete machine instruciton\n");
5338   MachineBasicBlock *MBB = MI.getParent();
5339   MachineFunction *MF = MBB->getParent();
5340   MachineRegisterInfo *MRI = &MF->getRegInfo();
5341   Register VReg = MI.getOperand(0).getReg();
5342   if (!Register::isVirtualRegister(VReg))
5343     return false;
5344 
5345   MachineInstr *DefMI = MRI->getVRegDef(VReg);
5346 
5347   // Look through COPY instructions to find definition.
5348   while (DefMI->isCopy()) {
5349     Register CopyVReg = DefMI->getOperand(1).getReg();
5350     if (!MRI->hasOneNonDBGUse(CopyVReg))
5351       return false;
5352     if (!MRI->hasOneDef(CopyVReg))
5353       return false;
5354     DefMI = MRI->getVRegDef(CopyVReg);
5355   }
5356 
5357   switch (DefMI->getOpcode()) {
5358   default:
5359     return false;
5360   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
5361   case AArch64::ANDWri:
5362   case AArch64::ANDXri: {
5363     if (IsTestAndBranch)
5364       return false;
5365     if (DefMI->getParent() != MBB)
5366       return false;
5367     if (!MRI->hasOneNonDBGUse(VReg))
5368       return false;
5369 
5370     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
5371     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
5372         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
5373     if (!isPowerOf2_64(Mask))
5374       return false;
5375 
5376     MachineOperand &MO = DefMI->getOperand(1);
5377     Register NewReg = MO.getReg();
5378     if (!Register::isVirtualRegister(NewReg))
5379       return false;
5380 
5381     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
5382 
5383     MachineBasicBlock &RefToMBB = *MBB;
5384     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
5385     DebugLoc DL = MI.getDebugLoc();
5386     unsigned Imm = Log2_64(Mask);
5387     unsigned Opc = (Imm < 32)
5388                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
5389                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
5390     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
5391                               .addReg(NewReg)
5392                               .addImm(Imm)
5393                               .addMBB(TBB);
5394     // Register lives on to the CBZ now.
5395     MO.setIsKill(false);
5396 
5397     // For immediate smaller than 32, we need to use the 32-bit
5398     // variant (W) in all cases. Indeed the 64-bit variant does not
5399     // allow to encode them.
5400     // Therefore, if the input register is 64-bit, we need to take the
5401     // 32-bit sub-part.
5402     if (!Is32Bit && Imm < 32)
5403       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
5404     MI.eraseFromParent();
5405     return true;
5406   }
5407   // Look for CSINC
5408   case AArch64::CSINCWr:
5409   case AArch64::CSINCXr: {
5410     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
5411           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
5412         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
5413           DefMI->getOperand(2).getReg() == AArch64::XZR))
5414       return false;
5415 
5416     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
5417       return false;
5418 
5419     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
5420     // Convert only when the condition code is not modified between
5421     // the CSINC and the branch. The CC may be used by other
5422     // instructions in between.
5423     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
5424       return false;
5425     MachineBasicBlock &RefToMBB = *MBB;
5426     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
5427     DebugLoc DL = MI.getDebugLoc();
5428     if (IsNegativeBranch)
5429       CC = AArch64CC::getInvertedCondCode(CC);
5430     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
5431     MI.eraseFromParent();
5432     return true;
5433   }
5434   }
5435 }
5436 
5437 std::pair<unsigned, unsigned>
5438 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5439   const unsigned Mask = AArch64II::MO_FRAGMENT;
5440   return std::make_pair(TF & Mask, TF & ~Mask);
5441 }
5442 
5443 ArrayRef<std::pair<unsigned, const char *>>
5444 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5445   using namespace AArch64II;
5446 
5447   static const std::pair<unsigned, const char *> TargetFlags[] = {
5448       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
5449       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
5450       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
5451       {MO_HI12, "aarch64-hi12"}};
5452   return makeArrayRef(TargetFlags);
5453 }
5454 
5455 ArrayRef<std::pair<unsigned, const char *>>
5456 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5457   using namespace AArch64II;
5458 
5459   static const std::pair<unsigned, const char *> TargetFlags[] = {
5460       {MO_COFFSTUB, "aarch64-coffstub"},
5461       {MO_GOT, "aarch64-got"},
5462       {MO_NC, "aarch64-nc"},
5463       {MO_S, "aarch64-s"},
5464       {MO_TLS, "aarch64-tls"},
5465       {MO_DLLIMPORT, "aarch64-dllimport"},
5466       {MO_PREL, "aarch64-prel"},
5467       {MO_TAGGED, "aarch64-tagged"}};
5468   return makeArrayRef(TargetFlags);
5469 }
5470 
5471 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
5472 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5473   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5474       {{MOSuppressPair, "aarch64-suppress-pair"},
5475        {MOStridedAccess, "aarch64-strided-access"}};
5476   return makeArrayRef(TargetFlags);
5477 }
5478 
5479 /// Constants defining how certain sequences should be outlined.
5480 /// This encompasses how an outlined function should be called, and what kind of
5481 /// frame should be emitted for that outlined function.
5482 ///
5483 /// \p MachineOutlinerDefault implies that the function should be called with
5484 /// a save and restore of LR to the stack.
5485 ///
5486 /// That is,
5487 ///
5488 /// I1     Save LR                    OUTLINED_FUNCTION:
5489 /// I2 --> BL OUTLINED_FUNCTION       I1
5490 /// I3     Restore LR                 I2
5491 ///                                   I3
5492 ///                                   RET
5493 ///
5494 /// * Call construction overhead: 3 (save + BL + restore)
5495 /// * Frame construction overhead: 1 (ret)
5496 /// * Requires stack fixups? Yes
5497 ///
5498 /// \p MachineOutlinerTailCall implies that the function is being created from
5499 /// a sequence of instructions ending in a return.
5500 ///
5501 /// That is,
5502 ///
5503 /// I1                             OUTLINED_FUNCTION:
5504 /// I2 --> B OUTLINED_FUNCTION     I1
5505 /// RET                            I2
5506 ///                                RET
5507 ///
5508 /// * Call construction overhead: 1 (B)
5509 /// * Frame construction overhead: 0 (Return included in sequence)
5510 /// * Requires stack fixups? No
5511 ///
5512 /// \p MachineOutlinerNoLRSave implies that the function should be called using
5513 /// a BL instruction, but doesn't require LR to be saved and restored. This
5514 /// happens when LR is known to be dead.
5515 ///
5516 /// That is,
5517 ///
5518 /// I1                                OUTLINED_FUNCTION:
5519 /// I2 --> BL OUTLINED_FUNCTION       I1
5520 /// I3                                I2
5521 ///                                   I3
5522 ///                                   RET
5523 ///
5524 /// * Call construction overhead: 1 (BL)
5525 /// * Frame construction overhead: 1 (RET)
5526 /// * Requires stack fixups? No
5527 ///
5528 /// \p MachineOutlinerThunk implies that the function is being created from
5529 /// a sequence of instructions ending in a call. The outlined function is
5530 /// called with a BL instruction, and the outlined function tail-calls the
5531 /// original call destination.
5532 ///
5533 /// That is,
5534 ///
5535 /// I1                                OUTLINED_FUNCTION:
5536 /// I2 --> BL OUTLINED_FUNCTION       I1
5537 /// BL f                              I2
5538 ///                                   B f
5539 /// * Call construction overhead: 1 (BL)
5540 /// * Frame construction overhead: 0
5541 /// * Requires stack fixups? No
5542 ///
5543 /// \p MachineOutlinerRegSave implies that the function should be called with a
5544 /// save and restore of LR to an available register. This allows us to avoid
5545 /// stack fixups. Note that this outlining variant is compatible with the
5546 /// NoLRSave case.
5547 ///
5548 /// That is,
5549 ///
5550 /// I1     Save LR                    OUTLINED_FUNCTION:
5551 /// I2 --> BL OUTLINED_FUNCTION       I1
5552 /// I3     Restore LR                 I2
5553 ///                                   I3
5554 ///                                   RET
5555 ///
5556 /// * Call construction overhead: 3 (save + BL + restore)
5557 /// * Frame construction overhead: 1 (ret)
5558 /// * Requires stack fixups? No
5559 enum MachineOutlinerClass {
5560   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
5561   MachineOutlinerTailCall, /// Only emit a branch.
5562   MachineOutlinerNoLRSave, /// Emit a call and return.
5563   MachineOutlinerThunk,    /// Emit a call and tail-call.
5564   MachineOutlinerRegSave   /// Same as default, but save to a register.
5565 };
5566 
5567 enum MachineOutlinerMBBFlags {
5568   LRUnavailableSomewhere = 0x2,
5569   HasCalls = 0x4,
5570   UnsafeRegsDead = 0x8
5571 };
5572 
5573 unsigned
5574 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5575   assert(C.LRUWasSet && "LRU wasn't set?");
5576   MachineFunction *MF = C.getMF();
5577   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5578       MF->getSubtarget().getRegisterInfo());
5579 
5580   // Check if there is an available register across the sequence that we can
5581   // use.
5582   for (unsigned Reg : AArch64::GPR64RegClass) {
5583     if (!ARI->isReservedReg(*MF, Reg) &&
5584         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
5585         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5586         Reg != AArch64::X17 && // Ditto for X17.
5587         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5588       return Reg;
5589   }
5590 
5591   // No suitable register. Return 0.
5592   return 0u;
5593 }
5594 
5595 static bool
5596 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
5597                                          const outliner::Candidate &b) {
5598   const Function &Fa = a.getMF()->getFunction();
5599   const Function &Fb = b.getMF()->getFunction();
5600 
5601   // If none of the functions have the "sign-return-address" attribute their
5602   // signing behaviour is equal
5603   if (!Fa.hasFnAttribute("sign-return-address") &&
5604       !Fb.hasFnAttribute("sign-return-address")) {
5605     return true;
5606   }
5607 
5608   // If both functions have the "sign-return-address" attribute their signing
5609   // behaviour is equal, if the values of the attributes are equal
5610   if (Fa.hasFnAttribute("sign-return-address") &&
5611       Fb.hasFnAttribute("sign-return-address")) {
5612     StringRef ScopeA =
5613         Fa.getFnAttribute("sign-return-address").getValueAsString();
5614     StringRef ScopeB =
5615         Fb.getFnAttribute("sign-return-address").getValueAsString();
5616     return ScopeA.equals(ScopeB);
5617   }
5618 
5619   // If function B doesn't have the "sign-return-address" attribute but A does,
5620   // the functions' signing behaviour is equal if A's value for
5621   // "sign-return-address" is "none" and vice versa.
5622   if (Fa.hasFnAttribute("sign-return-address")) {
5623     StringRef ScopeA =
5624         Fa.getFnAttribute("sign-return-address").getValueAsString();
5625     return ScopeA.equals("none");
5626   }
5627 
5628   if (Fb.hasFnAttribute("sign-return-address")) {
5629     StringRef ScopeB =
5630         Fb.getFnAttribute("sign-return-address").getValueAsString();
5631     return ScopeB.equals("none");
5632   }
5633 
5634   llvm_unreachable("Unkown combination of sign-return-address attributes");
5635 }
5636 
5637 static bool
5638 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
5639                                        const outliner::Candidate &b) {
5640   const Function &Fa = a.getMF()->getFunction();
5641   const Function &Fb = b.getMF()->getFunction();
5642 
5643   // If none of the functions have the "sign-return-address-key" attribute
5644   // their keys are equal
5645   if (!Fa.hasFnAttribute("sign-return-address-key") &&
5646       !Fb.hasFnAttribute("sign-return-address-key")) {
5647     return true;
5648   }
5649 
5650   // If both functions have the "sign-return-address-key" attribute their
5651   // keys are equal if the values of "sign-return-address-key" are equal
5652   if (Fa.hasFnAttribute("sign-return-address-key") &&
5653       Fb.hasFnAttribute("sign-return-address-key")) {
5654     StringRef KeyA =
5655         Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5656     StringRef KeyB =
5657         Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5658     return KeyA.equals(KeyB);
5659   }
5660 
5661   // If B doesn't have the "sign-return-address-key" attribute, both keys are
5662   // equal, if function a has the default key (a_key)
5663   if (Fa.hasFnAttribute("sign-return-address-key")) {
5664     StringRef KeyA =
5665         Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5666     return KeyA.equals_lower("a_key");
5667   }
5668 
5669   if (Fb.hasFnAttribute("sign-return-address-key")) {
5670     StringRef KeyB =
5671         Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5672     return KeyB.equals_lower("a_key");
5673   }
5674 
5675   llvm_unreachable("Unkown combination of sign-return-address-key attributes");
5676 }
5677 
5678 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
5679                                                 const outliner::Candidate &b) {
5680   const AArch64Subtarget &SubtargetA =
5681       a.getMF()->getSubtarget<AArch64Subtarget>();
5682   const AArch64Subtarget &SubtargetB =
5683       b.getMF()->getSubtarget<AArch64Subtarget>();
5684   return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
5685 }
5686 
5687 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
5688     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5689   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5690   unsigned SequenceSize =
5691       std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5692                       [this](unsigned Sum, const MachineInstr &MI) {
5693                         return Sum + getInstSizeInBytes(MI);
5694                       });
5695   unsigned NumBytesToCreateFrame = 0;
5696 
5697   // We only allow outlining for functions having exactly matching return
5698   // address signing attributes, i.e., all share the same value for the
5699   // attribute "sign-return-address" and all share the same type of key they
5700   // are signed with.
5701   // Additionally we require all functions to simultaniously either support
5702   // v8.3a features or not. Otherwise an outlined function could get signed
5703   // using dedicated v8.3 instructions and a call from a function that doesn't
5704   // support v8.3 instructions would therefore be invalid.
5705   if (std::adjacent_find(
5706           RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5707           [](const outliner::Candidate &a, const outliner::Candidate &b) {
5708             // Return true if a and b are non-equal w.r.t. return address
5709             // signing or support of v8.3a features
5710             if (outliningCandidatesSigningScopeConsensus(a, b) &&
5711                 outliningCandidatesSigningKeyConsensus(a, b) &&
5712                 outliningCandidatesV8_3OpsConsensus(a, b)) {
5713               return false;
5714             }
5715             return true;
5716           }) != RepeatedSequenceLocs.end()) {
5717     return outliner::OutlinedFunction();
5718   }
5719 
5720   // Since at this point all candidates agree on their return address signing
5721   // picking just one is fine. If the candidate functions potentially sign their
5722   // return addresses, the outlined function should do the same. Note that in
5723   // the case of "sign-return-address"="non-leaf" this is an assumption: It is
5724   // not certainly true that the outlined function will have to sign its return
5725   // address but this decision is made later, when the decision to outline
5726   // has already been made.
5727   // The same holds for the number of additional instructions we need: On
5728   // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
5729   // necessary. However, at this point we don't know if the outlined function
5730   // will have a RET instruction so we assume the worst.
5731   const Function &FCF = FirstCand.getMF()->getFunction();
5732   const TargetRegisterInfo &TRI = getRegisterInfo();
5733   if (FCF.hasFnAttribute("sign-return-address")) {
5734     // One PAC and one AUT instructions
5735     NumBytesToCreateFrame += 8;
5736 
5737     // We have to check if sp modifying instructions would get outlined.
5738     // If so we only allow outlining if sp is unchanged overall, so matching
5739     // sub and add instructions are okay to outline, all other sp modifications
5740     // are not
5741     auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
5742       int SPValue = 0;
5743       MachineBasicBlock::iterator MBBI = C.front();
5744       for (;;) {
5745         if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
5746           switch (MBBI->getOpcode()) {
5747           case AArch64::ADDXri:
5748           case AArch64::ADDWri:
5749             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5750             assert(MBBI->getOperand(2).isImm() &&
5751                    "Expected operand to be immediate");
5752             assert(MBBI->getOperand(1).isReg() &&
5753                    "Expected operand to be a register");
5754             // Check if the add just increments sp. If so, we search for
5755             // matching sub instructions that decrement sp. If not, the
5756             // modification is illegal
5757             if (MBBI->getOperand(1).getReg() == AArch64::SP)
5758               SPValue += MBBI->getOperand(2).getImm();
5759             else
5760               return true;
5761             break;
5762           case AArch64::SUBXri:
5763           case AArch64::SUBWri:
5764             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5765             assert(MBBI->getOperand(2).isImm() &&
5766                    "Expected operand to be immediate");
5767             assert(MBBI->getOperand(1).isReg() &&
5768                    "Expected operand to be a register");
5769             // Check if the sub just decrements sp. If so, we search for
5770             // matching add instructions that increment sp. If not, the
5771             // modification is illegal
5772             if (MBBI->getOperand(1).getReg() == AArch64::SP)
5773               SPValue -= MBBI->getOperand(2).getImm();
5774             else
5775               return true;
5776             break;
5777           default:
5778             return true;
5779           }
5780         }
5781         if (MBBI == C.back())
5782           break;
5783         ++MBBI;
5784       }
5785       if (SPValue)
5786         return true;
5787       return false;
5788     };
5789     // Remove candidates with illegal stack modifying instructions
5790     RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5791                                               RepeatedSequenceLocs.end(),
5792                                               hasIllegalSPModification),
5793                                RepeatedSequenceLocs.end());
5794 
5795     // If the sequence doesn't have enough candidates left, then we're done.
5796     if (RepeatedSequenceLocs.size() < 2)
5797       return outliner::OutlinedFunction();
5798   }
5799 
5800   // Properties about candidate MBBs that hold for all of them.
5801   unsigned FlagsSetInAll = 0xF;
5802 
5803   // Compute liveness information for each candidate, and set FlagsSetInAll.
5804   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5805                 [&FlagsSetInAll](outliner::Candidate &C) {
5806                   FlagsSetInAll &= C.Flags;
5807                 });
5808 
5809   // According to the AArch64 Procedure Call Standard, the following are
5810   // undefined on entry/exit from a function call:
5811   //
5812   // * Registers x16, x17, (and thus w16, w17)
5813   // * Condition codes (and thus the NZCV register)
5814   //
5815   // Because if this, we can't outline any sequence of instructions where
5816   // one
5817   // of these registers is live into/across it. Thus, we need to delete
5818   // those
5819   // candidates.
5820   auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5821     // If the unsafe registers in this block are all dead, then we don't need
5822     // to compute liveness here.
5823     if (C.Flags & UnsafeRegsDead)
5824       return false;
5825     C.initLRU(TRI);
5826     LiveRegUnits LRU = C.LRU;
5827     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5828             !LRU.available(AArch64::NZCV));
5829   };
5830 
5831   // Are there any candidates where those registers are live?
5832   if (!(FlagsSetInAll & UnsafeRegsDead)) {
5833     // Erase every candidate that violates the restrictions above. (It could be
5834     // true that we have viable candidates, so it's not worth bailing out in
5835     // the case that, say, 1 out of 20 candidates violate the restructions.)
5836     RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5837                                               RepeatedSequenceLocs.end(),
5838                                               CantGuaranteeValueAcrossCall),
5839                                RepeatedSequenceLocs.end());
5840 
5841     // If the sequence doesn't have enough candidates left, then we're done.
5842     if (RepeatedSequenceLocs.size() < 2)
5843       return outliner::OutlinedFunction();
5844   }
5845 
5846   // At this point, we have only "safe" candidates to outline. Figure out
5847   // frame + call instruction information.
5848 
5849   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5850 
5851   // Helper lambda which sets call information for every candidate.
5852   auto SetCandidateCallInfo =
5853       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5854         for (outliner::Candidate &C : RepeatedSequenceLocs)
5855           C.setCallInfo(CallID, NumBytesForCall);
5856       };
5857 
5858   unsigned FrameID = MachineOutlinerDefault;
5859   NumBytesToCreateFrame += 4;
5860 
5861   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
5862     return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
5863   });
5864 
5865   // Returns true if an instructions is safe to fix up, false otherwise.
5866   auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
5867     if (MI.isCall())
5868       return true;
5869 
5870     if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
5871         !MI.readsRegister(AArch64::SP, &TRI))
5872       return true;
5873 
5874     // Any modification of SP will break our code to save/restore LR.
5875     // FIXME: We could handle some instructions which add a constant
5876     // offset to SP, with a bit more work.
5877     if (MI.modifiesRegister(AArch64::SP, &TRI))
5878       return false;
5879 
5880     // At this point, we have a stack instruction that we might need to
5881     // fix up. We'll handle it if it's a load or store.
5882     if (MI.mayLoadOrStore()) {
5883       const MachineOperand *Base; // Filled with the base operand of MI.
5884       int64_t Offset;             // Filled with the offset of MI.
5885       bool OffsetIsScalable;
5886 
5887       // Does it allow us to offset the base operand and is the base the
5888       // register SP?
5889       if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
5890           !Base->isReg() || Base->getReg() != AArch64::SP)
5891         return false;
5892 
5893       // Fixe-up code below assumes bytes.
5894       if (OffsetIsScalable)
5895         return false;
5896 
5897       // Find the minimum/maximum offset for this instruction and check
5898       // if fixing it up would be in range.
5899       int64_t MinOffset,
5900           MaxOffset;  // Unscaled offsets for the instruction.
5901       TypeSize Scale(0U, false); // The scale to multiply the offsets by.
5902       unsigned DummyWidth;
5903       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5904 
5905       Offset += 16; // Update the offset to what it would be if we outlined.
5906       if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
5907           Offset > MaxOffset * (int64_t)Scale.getFixedSize())
5908         return false;
5909 
5910       // It's in range, so we can outline it.
5911       return true;
5912     }
5913 
5914     // FIXME: Add handling for instructions like "add x0, sp, #8".
5915 
5916     // We can't fix it up, so don't outline it.
5917     return false;
5918   };
5919 
5920   // True if it's possible to fix up each stack instruction in this sequence.
5921   // Important for frames/call variants that modify the stack.
5922   bool AllStackInstrsSafe = std::all_of(
5923       FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5924 
5925   // If the last instruction in any candidate is a terminator, then we should
5926   // tail call all of the candidates.
5927   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5928     FrameID = MachineOutlinerTailCall;
5929     NumBytesToCreateFrame = 0;
5930     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5931   }
5932 
5933   else if (LastInstrOpcode == AArch64::BL ||
5934            (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5935     // FIXME: Do we need to check if the code after this uses the value of LR?
5936     FrameID = MachineOutlinerThunk;
5937     NumBytesToCreateFrame = 0;
5938     SetCandidateCallInfo(MachineOutlinerThunk, 4);
5939   }
5940 
5941   else {
5942     // We need to decide how to emit calls + frames. We can always emit the same
5943     // frame if we don't need to save to the stack. If we have to save to the
5944     // stack, then we need a different frame.
5945     unsigned NumBytesNoStackCalls = 0;
5946     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5947 
5948     // Check if we have to save LR.
5949     for (outliner::Candidate &C : RepeatedSequenceLocs) {
5950       C.initLRU(TRI);
5951 
5952       // If we have a noreturn caller, then we're going to be conservative and
5953       // say that we have to save LR. If we don't have a ret at the end of the
5954       // block, then we can't reason about liveness accurately.
5955       //
5956       // FIXME: We can probably do better than always disabling this in
5957       // noreturn functions by fixing up the liveness info.
5958       bool IsNoReturn =
5959           C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
5960 
5961       // Is LR available? If so, we don't need a save.
5962       if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
5963         NumBytesNoStackCalls += 4;
5964         C.setCallInfo(MachineOutlinerNoLRSave, 4);
5965         CandidatesWithoutStackFixups.push_back(C);
5966       }
5967 
5968       // Is an unused register available? If so, we won't modify the stack, so
5969       // we can outline with the same frame type as those that don't save LR.
5970       else if (findRegisterToSaveLRTo(C)) {
5971         NumBytesNoStackCalls += 12;
5972         C.setCallInfo(MachineOutlinerRegSave, 12);
5973         CandidatesWithoutStackFixups.push_back(C);
5974       }
5975 
5976       // Is SP used in the sequence at all? If not, we don't have to modify
5977       // the stack, so we are guaranteed to get the same frame.
5978       else if (C.UsedInSequence.available(AArch64::SP)) {
5979         NumBytesNoStackCalls += 12;
5980         C.setCallInfo(MachineOutlinerDefault, 12);
5981         CandidatesWithoutStackFixups.push_back(C);
5982       }
5983 
5984       // If we outline this, we need to modify the stack. Pretend we don't
5985       // outline this by saving all of its bytes.
5986       else {
5987         NumBytesNoStackCalls += SequenceSize;
5988       }
5989     }
5990 
5991     // If there are no places where we have to save LR, then note that we
5992     // don't have to update the stack. Otherwise, give every candidate the
5993     // default call type, as long as it's safe to do so.
5994     if (!AllStackInstrsSafe ||
5995         NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5996       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5997       FrameID = MachineOutlinerNoLRSave;
5998     } else {
5999       SetCandidateCallInfo(MachineOutlinerDefault, 12);
6000     }
6001 
6002     // If we dropped all of the candidates, bail out here.
6003     if (RepeatedSequenceLocs.size() < 2) {
6004       RepeatedSequenceLocs.clear();
6005       return outliner::OutlinedFunction();
6006     }
6007   }
6008 
6009   // Does every candidate's MBB contain a call? If so, then we might have a call
6010   // in the range.
6011   if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6012     // Check if the range contains a call. These require a save + restore of the
6013     // link register.
6014     bool ModStackToSaveLR = false;
6015     if (std::any_of(FirstCand.front(), FirstCand.back(),
6016                     [](const MachineInstr &MI) { return MI.isCall(); }))
6017       ModStackToSaveLR = true;
6018 
6019     // Handle the last instruction separately. If this is a tail call, then the
6020     // last instruction is a call. We don't want to save + restore in this case.
6021     // However, it could be possible that the last instruction is a call without
6022     // it being valid to tail call this sequence. We should consider this as
6023     // well.
6024     else if (FrameID != MachineOutlinerThunk &&
6025              FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
6026       ModStackToSaveLR = true;
6027 
6028     if (ModStackToSaveLR) {
6029       // We can't fix up the stack. Bail out.
6030       if (!AllStackInstrsSafe) {
6031         RepeatedSequenceLocs.clear();
6032         return outliner::OutlinedFunction();
6033       }
6034 
6035       // Save + restore LR.
6036       NumBytesToCreateFrame += 8;
6037     }
6038   }
6039 
6040   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6041                                     NumBytesToCreateFrame, FrameID);
6042 }
6043 
6044 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
6045     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6046   const Function &F = MF.getFunction();
6047 
6048   // Can F be deduplicated by the linker? If it can, don't outline from it.
6049   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6050     return false;
6051 
6052   // Don't outline from functions with section markings; the program could
6053   // expect that all the code is in the named section.
6054   // FIXME: Allow outlining from multiple functions with the same section
6055   // marking.
6056   if (F.hasSection())
6057     return false;
6058 
6059   // Outlining from functions with redzones is unsafe since the outliner may
6060   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
6061   // outline from it.
6062   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
6063   if (!AFI || AFI->hasRedZone().getValueOr(true))
6064     return false;
6065 
6066   // It's safe to outline from MF.
6067   return true;
6068 }
6069 
6070 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
6071                                               unsigned &Flags) const {
6072   // Check if LR is available through all of the MBB. If it's not, then set
6073   // a flag.
6074   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6075          "Suitable Machine Function for outlining must track liveness");
6076   LiveRegUnits LRU(getRegisterInfo());
6077 
6078   std::for_each(MBB.rbegin(), MBB.rend(),
6079                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
6080 
6081   // Check if each of the unsafe registers are available...
6082   bool W16AvailableInBlock = LRU.available(AArch64::W16);
6083   bool W17AvailableInBlock = LRU.available(AArch64::W17);
6084   bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
6085 
6086   // If all of these are dead (and not live out), we know we don't have to check
6087   // them later.
6088   if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
6089     Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6090 
6091   // Now, add the live outs to the set.
6092   LRU.addLiveOuts(MBB);
6093 
6094   // If any of these registers is available in the MBB, but also a live out of
6095   // the block, then we know outlining is unsafe.
6096   if (W16AvailableInBlock && !LRU.available(AArch64::W16))
6097     return false;
6098   if (W17AvailableInBlock && !LRU.available(AArch64::W17))
6099     return false;
6100   if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
6101     return false;
6102 
6103   // Check if there's a call inside this MachineBasicBlock. If there is, then
6104   // set a flag.
6105   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6106     Flags |= MachineOutlinerMBBFlags::HasCalls;
6107 
6108   MachineFunction *MF = MBB.getParent();
6109 
6110   // In the event that we outline, we may have to save LR. If there is an
6111   // available register in the MBB, then we'll always save LR there. Check if
6112   // this is true.
6113   bool CanSaveLR = false;
6114   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6115       MF->getSubtarget().getRegisterInfo());
6116 
6117   // Check if there is an available register across the sequence that we can
6118   // use.
6119   for (unsigned Reg : AArch64::GPR64RegClass) {
6120     if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
6121         Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
6122       CanSaveLR = true;
6123       break;
6124     }
6125   }
6126 
6127   // Check if we have a register we can save LR to, and if LR was used
6128   // somewhere. If both of those things are true, then we need to evaluate the
6129   // safety of outlining stack instructions later.
6130   if (!CanSaveLR && !LRU.available(AArch64::LR))
6131     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6132 
6133   return true;
6134 }
6135 
6136 outliner::InstrType
6137 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
6138                                    unsigned Flags) const {
6139   MachineInstr &MI = *MIT;
6140   MachineBasicBlock *MBB = MI.getParent();
6141   MachineFunction *MF = MBB->getParent();
6142   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
6143 
6144   // Don't outline anything used for return address signing. The outlined
6145   // function will get signed later if needed
6146   switch (MI.getOpcode()) {
6147   case AArch64::PACIASP:
6148   case AArch64::PACIBSP:
6149   case AArch64::AUTIASP:
6150   case AArch64::AUTIBSP:
6151   case AArch64::RETAA:
6152   case AArch64::RETAB:
6153   case AArch64::EMITBKEY:
6154     return outliner::InstrType::Illegal;
6155   }
6156 
6157   // Don't outline LOHs.
6158   if (FuncInfo->getLOHRelated().count(&MI))
6159     return outliner::InstrType::Illegal;
6160 
6161   // We can only outline these if we will tail call the outlined function, or
6162   // fix up the CFI offsets. For the sake of safety, don't outline CFI
6163   // instructions.
6164   //
6165   // FIXME: If the proper fixups are implemented, this should be possible.
6166   if (MI.isCFIInstruction())
6167     return outliner::InstrType::Illegal;
6168 
6169   // Don't allow debug values to impact outlining type.
6170   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
6171     return outliner::InstrType::Invisible;
6172 
6173   // At this point, KILL instructions don't really tell us much so we can go
6174   // ahead and skip over them.
6175   if (MI.isKill())
6176     return outliner::InstrType::Invisible;
6177 
6178   // Is this a terminator for a basic block?
6179   if (MI.isTerminator()) {
6180 
6181     // Is this the end of a function?
6182     if (MI.getParent()->succ_empty())
6183       return outliner::InstrType::Legal;
6184 
6185     // It's not, so don't outline it.
6186     return outliner::InstrType::Illegal;
6187   }
6188 
6189   // Make sure none of the operands are un-outlinable.
6190   for (const MachineOperand &MOP : MI.operands()) {
6191     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
6192         MOP.isTargetIndex())
6193       return outliner::InstrType::Illegal;
6194 
6195     // If it uses LR or W30 explicitly, then don't touch it.
6196     if (MOP.isReg() && !MOP.isImplicit() &&
6197         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
6198       return outliner::InstrType::Illegal;
6199   }
6200 
6201   // Special cases for instructions that can always be outlined, but will fail
6202   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
6203   // be outlined because they don't require a *specific* value to be in LR.
6204   if (MI.getOpcode() == AArch64::ADRP)
6205     return outliner::InstrType::Legal;
6206 
6207   // If MI is a call we might be able to outline it. We don't want to outline
6208   // any calls that rely on the position of items on the stack. When we outline
6209   // something containing a call, we have to emit a save and restore of LR in
6210   // the outlined function. Currently, this always happens by saving LR to the
6211   // stack. Thus, if we outline, say, half the parameters for a function call
6212   // plus the call, then we'll break the callee's expectations for the layout
6213   // of the stack.
6214   //
6215   // FIXME: Allow calls to functions which construct a stack frame, as long
6216   // as they don't access arguments on the stack.
6217   // FIXME: Figure out some way to analyze functions defined in other modules.
6218   // We should be able to compute the memory usage based on the IR calling
6219   // convention, even if we can't see the definition.
6220   if (MI.isCall()) {
6221     // Get the function associated with the call. Look at each operand and find
6222     // the one that represents the callee and get its name.
6223     const Function *Callee = nullptr;
6224     for (const MachineOperand &MOP : MI.operands()) {
6225       if (MOP.isGlobal()) {
6226         Callee = dyn_cast<Function>(MOP.getGlobal());
6227         break;
6228       }
6229     }
6230 
6231     // Never outline calls to mcount.  There isn't any rule that would require
6232     // this, but the Linux kernel's "ftrace" feature depends on it.
6233     if (Callee && Callee->getName() == "\01_mcount")
6234       return outliner::InstrType::Illegal;
6235 
6236     // If we don't know anything about the callee, assume it depends on the
6237     // stack layout of the caller. In that case, it's only legal to outline
6238     // as a tail-call.  Whitelist the call instructions we know about so we
6239     // don't get unexpected results with call pseudo-instructions.
6240     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6241     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
6242       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6243 
6244     if (!Callee)
6245       return UnknownCallOutlineType;
6246 
6247     // We have a function we have information about. Check it if it's something
6248     // can safely outline.
6249     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6250 
6251     // We don't know what's going on with the callee at all. Don't touch it.
6252     if (!CalleeMF)
6253       return UnknownCallOutlineType;
6254 
6255     // Check if we know anything about the callee saves on the function. If we
6256     // don't, then don't touch it, since that implies that we haven't
6257     // computed anything about its stack frame yet.
6258     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6259     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6260         MFI.getNumObjects() > 0)
6261       return UnknownCallOutlineType;
6262 
6263     // At this point, we can say that CalleeMF ought to not pass anything on the
6264     // stack. Therefore, we can outline it.
6265     return outliner::InstrType::Legal;
6266   }
6267 
6268   // Don't outline positions.
6269   if (MI.isPosition())
6270     return outliner::InstrType::Illegal;
6271 
6272   // Don't touch the link register or W30.
6273   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
6274       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
6275     return outliner::InstrType::Illegal;
6276 
6277   // Don't outline BTI instructions, because that will prevent the outlining
6278   // site from being indirectly callable.
6279   if (MI.getOpcode() == AArch64::HINT) {
6280     int64_t Imm = MI.getOperand(0).getImm();
6281     if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
6282       return outliner::InstrType::Illegal;
6283   }
6284 
6285   return outliner::InstrType::Legal;
6286 }
6287 
6288 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6289   for (MachineInstr &MI : MBB) {
6290     const MachineOperand *Base;
6291     unsigned Width;
6292     int64_t Offset;
6293     bool OffsetIsScalable;
6294 
6295     // Is this a load or store with an immediate offset with SP as the base?
6296     if (!MI.mayLoadOrStore() ||
6297         !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
6298                                       &RI) ||
6299         (Base->isReg() && Base->getReg() != AArch64::SP))
6300       continue;
6301 
6302     // It is, so we have to fix it up.
6303     TypeSize Scale(0U, false);
6304     int64_t Dummy1, Dummy2;
6305 
6306     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
6307     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
6308     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
6309     assert(Scale != 0 && "Unexpected opcode!");
6310     assert(!OffsetIsScalable && "Expected offset to be a byte offset");
6311 
6312     // We've pushed the return address to the stack, so add 16 to the offset.
6313     // This is safe, since we already checked if it would overflow when we
6314     // checked if this instruction was legal to outline.
6315     int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
6316     StackOffsetOperand.setImm(NewImm);
6317   }
6318 }
6319 
6320 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
6321                                  bool ShouldSignReturnAddr,
6322                                  bool ShouldSignReturnAddrWithAKey) {
6323   if (ShouldSignReturnAddr) {
6324     MachineBasicBlock::iterator MBBPAC = MBB.begin();
6325     MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
6326     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
6327     const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6328     DebugLoc DL;
6329 
6330     if (MBBAUT != MBB.end())
6331       DL = MBBAUT->getDebugLoc();
6332 
6333     // At the very beginning of the basic block we insert the following
6334     // depending on the key type
6335     //
6336     // a_key:                   b_key:
6337     //    PACIASP                   EMITBKEY
6338     //    CFI_INSTRUCTION           PACIBSP
6339     //                              CFI_INSTRUCTION
6340     if (ShouldSignReturnAddrWithAKey) {
6341       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
6342           .setMIFlag(MachineInstr::FrameSetup);
6343     } else {
6344       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
6345           .setMIFlag(MachineInstr::FrameSetup);
6346       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
6347           .setMIFlag(MachineInstr::FrameSetup);
6348     }
6349     unsigned CFIIndex =
6350         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
6351     BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
6352         .addCFIIndex(CFIIndex)
6353         .setMIFlags(MachineInstr::FrameSetup);
6354 
6355     // If v8.3a features are available we can replace a RET instruction by
6356     // RETAA or RETAB and omit the AUT instructions
6357     if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
6358         MBBAUT->getOpcode() == AArch64::RET) {
6359       BuildMI(MBB, MBBAUT, DL,
6360               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
6361                                                     : AArch64::RETAB))
6362           .copyImplicitOps(*MBBAUT);
6363       MBB.erase(MBBAUT);
6364     } else {
6365       BuildMI(MBB, MBBAUT, DL,
6366               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
6367                                                     : AArch64::AUTIBSP))
6368           .setMIFlag(MachineInstr::FrameDestroy);
6369     }
6370   }
6371 }
6372 
6373 void AArch64InstrInfo::buildOutlinedFrame(
6374     MachineBasicBlock &MBB, MachineFunction &MF,
6375     const outliner::OutlinedFunction &OF) const {
6376   // For thunk outlining, rewrite the last instruction from a call to a
6377   // tail-call.
6378   if (OF.FrameConstructionID == MachineOutlinerThunk) {
6379     MachineInstr *Call = &*--MBB.instr_end();
6380     unsigned TailOpcode;
6381     if (Call->getOpcode() == AArch64::BL) {
6382       TailOpcode = AArch64::TCRETURNdi;
6383     } else {
6384       assert(Call->getOpcode() == AArch64::BLR);
6385       TailOpcode = AArch64::TCRETURNriALL;
6386     }
6387     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
6388                            .add(Call->getOperand(0))
6389                            .addImm(0);
6390     MBB.insert(MBB.end(), TC);
6391     Call->eraseFromParent();
6392   }
6393 
6394   bool IsLeafFunction = true;
6395 
6396   // Is there a call in the outlined range?
6397   auto IsNonTailCall = [](const MachineInstr &MI) {
6398     return MI.isCall() && !MI.isReturn();
6399   };
6400 
6401   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
6402     // Fix up the instructions in the range, since we're going to modify the
6403     // stack.
6404     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6405            "Can only fix up stack references once");
6406     fixupPostOutline(MBB);
6407 
6408     IsLeafFunction = false;
6409 
6410     // LR has to be a live in so that we can save it.
6411     MBB.addLiveIn(AArch64::LR);
6412 
6413     MachineBasicBlock::iterator It = MBB.begin();
6414     MachineBasicBlock::iterator Et = MBB.end();
6415 
6416     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6417         OF.FrameConstructionID == MachineOutlinerThunk)
6418       Et = std::prev(MBB.end());
6419 
6420     // Insert a save before the outlined region
6421     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6422                                 .addReg(AArch64::SP, RegState::Define)
6423                                 .addReg(AArch64::LR)
6424                                 .addReg(AArch64::SP)
6425                                 .addImm(-16);
6426     It = MBB.insert(It, STRXpre);
6427 
6428     const TargetSubtargetInfo &STI = MF.getSubtarget();
6429     const MCRegisterInfo *MRI = STI.getRegisterInfo();
6430     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
6431 
6432     // Add a CFI saying the stack was moved 16 B down.
6433     int64_t StackPosEntry =
6434         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
6435     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6436         .addCFIIndex(StackPosEntry)
6437         .setMIFlags(MachineInstr::FrameSetup);
6438 
6439     // Add a CFI saying that the LR that we want to find is now 16 B higher than
6440     // before.
6441     int64_t LRPosEntry =
6442         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
6443     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6444         .addCFIIndex(LRPosEntry)
6445         .setMIFlags(MachineInstr::FrameSetup);
6446 
6447     // Insert a restore before the terminator for the function.
6448     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6449                                  .addReg(AArch64::SP, RegState::Define)
6450                                  .addReg(AArch64::LR, RegState::Define)
6451                                  .addReg(AArch64::SP)
6452                                  .addImm(16);
6453     Et = MBB.insert(Et, LDRXpost);
6454   }
6455 
6456   // If a bunch of candidates reach this point they must agree on their return
6457   // address signing. It is therefore enough to just consider the signing
6458   // behaviour of one of them
6459   const Function &CF = OF.Candidates.front().getMF()->getFunction();
6460   bool ShouldSignReturnAddr = false;
6461   if (CF.hasFnAttribute("sign-return-address")) {
6462     StringRef Scope =
6463         CF.getFnAttribute("sign-return-address").getValueAsString();
6464     if (Scope.equals("all"))
6465       ShouldSignReturnAddr = true;
6466     else if (Scope.equals("non-leaf") && !IsLeafFunction)
6467       ShouldSignReturnAddr = true;
6468   }
6469 
6470   // a_key is the default
6471   bool ShouldSignReturnAddrWithAKey = true;
6472   if (CF.hasFnAttribute("sign-return-address-key")) {
6473     const StringRef Key =
6474         CF.getFnAttribute("sign-return-address-key").getValueAsString();
6475     // Key can either be a_key or b_key
6476     assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
6477            "Return address signing key must be either a_key or b_key");
6478     ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
6479   }
6480 
6481   // If this is a tail call outlined function, then there's already a return.
6482   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6483       OF.FrameConstructionID == MachineOutlinerThunk) {
6484     signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6485                          ShouldSignReturnAddrWithAKey);
6486     return;
6487   }
6488 
6489   // It's not a tail call, so we have to insert the return ourselves.
6490   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
6491                           .addReg(AArch64::LR, RegState::Undef);
6492   MBB.insert(MBB.end(), ret);
6493 
6494   signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6495                        ShouldSignReturnAddrWithAKey);
6496 
6497   // Did we have to modify the stack by saving the link register?
6498   if (OF.FrameConstructionID != MachineOutlinerDefault)
6499     return;
6500 
6501   // We modified the stack.
6502   // Walk over the basic block and fix up all the stack accesses.
6503   fixupPostOutline(MBB);
6504 }
6505 
6506 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
6507     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6508     MachineFunction &MF, const outliner::Candidate &C) const {
6509 
6510   // Are we tail calling?
6511   if (C.CallConstructionID == MachineOutlinerTailCall) {
6512     // If yes, then we can just branch to the label.
6513     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
6514                             .addGlobalAddress(M.getNamedValue(MF.getName()))
6515                             .addImm(0));
6516     return It;
6517   }
6518 
6519   // Are we saving the link register?
6520   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6521       C.CallConstructionID == MachineOutlinerThunk) {
6522     // No, so just insert the call.
6523     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6524                             .addGlobalAddress(M.getNamedValue(MF.getName())));
6525     return It;
6526   }
6527 
6528   // We want to return the spot where we inserted the call.
6529   MachineBasicBlock::iterator CallPt;
6530 
6531   // Instructions for saving and restoring LR around the call instruction we're
6532   // going to insert.
6533   MachineInstr *Save;
6534   MachineInstr *Restore;
6535   // Can we save to a register?
6536   if (C.CallConstructionID == MachineOutlinerRegSave) {
6537     // FIXME: This logic should be sunk into a target-specific interface so that
6538     // we don't have to recompute the register.
6539     unsigned Reg = findRegisterToSaveLRTo(C);
6540     assert(Reg != 0 && "No callee-saved register available?");
6541 
6542     // Save and restore LR from that register.
6543     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
6544                .addReg(AArch64::XZR)
6545                .addReg(AArch64::LR)
6546                .addImm(0);
6547     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
6548                 .addReg(AArch64::XZR)
6549                 .addReg(Reg)
6550                 .addImm(0);
6551   } else {
6552     // We have the default case. Save and restore from SP.
6553     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6554                .addReg(AArch64::SP, RegState::Define)
6555                .addReg(AArch64::LR)
6556                .addReg(AArch64::SP)
6557                .addImm(-16);
6558     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6559                   .addReg(AArch64::SP, RegState::Define)
6560                   .addReg(AArch64::LR, RegState::Define)
6561                   .addReg(AArch64::SP)
6562                   .addImm(16);
6563   }
6564 
6565   It = MBB.insert(It, Save);
6566   It++;
6567 
6568   // Insert the call.
6569   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6570                           .addGlobalAddress(M.getNamedValue(MF.getName())));
6571   CallPt = It;
6572   It++;
6573 
6574   It = MBB.insert(It, Restore);
6575   return CallPt;
6576 }
6577 
6578 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
6579   MachineFunction &MF) const {
6580   return MF.getFunction().hasMinSize();
6581 }
6582 
6583 Optional<DestSourcePair>
6584 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
6585 
6586   // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
6587   // and zero immediate operands used as an alias for mov instruction.
6588   if (MI.getOpcode() == AArch64::ORRWrs &&
6589       MI.getOperand(1).getReg() == AArch64::WZR &&
6590       MI.getOperand(3).getImm() == 0x0) {
6591     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6592   }
6593 
6594   if (MI.getOpcode() == AArch64::ORRXrs &&
6595       MI.getOperand(1).getReg() == AArch64::XZR &&
6596       MI.getOperand(3).getImm() == 0x0) {
6597     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6598   }
6599 
6600   return None;
6601 }
6602 
6603 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
6604                                                       Register Reg) const {
6605   int Sign = 1;
6606   int64_t Offset = 0;
6607 
6608   // TODO: Handle cases where Reg is a super- or sub-register of the
6609   // destination register.
6610   const MachineOperand &Op0 = MI.getOperand(0);
6611   if (!Op0.isReg() || Reg != Op0.getReg())
6612     return None;
6613 
6614   switch (MI.getOpcode()) {
6615   default:
6616     return None;
6617   case AArch64::SUBWri:
6618   case AArch64::SUBXri:
6619   case AArch64::SUBSWri:
6620   case AArch64::SUBSXri:
6621     Sign *= -1;
6622     LLVM_FALLTHROUGH;
6623   case AArch64::ADDSWri:
6624   case AArch64::ADDSXri:
6625   case AArch64::ADDWri:
6626   case AArch64::ADDXri: {
6627     // TODO: Third operand can be global address (usually some string).
6628     if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
6629         !MI.getOperand(2).isImm())
6630       return None;
6631     Offset = MI.getOperand(2).getImm() * Sign;
6632     int Shift = MI.getOperand(3).getImm();
6633     assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
6634     Offset = Offset << Shift;
6635   }
6636   }
6637   return RegImmPair{MI.getOperand(1).getReg(), Offset};
6638 }
6639 
6640 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
6641 /// the destination register then, if possible, describe the value in terms of
6642 /// the source register.
6643 static Optional<ParamLoadedValue>
6644 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
6645                        const TargetInstrInfo *TII,
6646                        const TargetRegisterInfo *TRI) {
6647   auto DestSrc = TII->isCopyInstr(MI);
6648   if (!DestSrc)
6649     return None;
6650 
6651   Register DestReg = DestSrc->Destination->getReg();
6652   Register SrcReg = DestSrc->Source->getReg();
6653 
6654   auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
6655 
6656   // If the described register is the destination, just return the source.
6657   if (DestReg == DescribedReg)
6658     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6659 
6660   // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
6661   if (MI.getOpcode() == AArch64::ORRWrs &&
6662       TRI->isSuperRegister(DestReg, DescribedReg))
6663     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6664 
6665   // We may need to describe the lower part of a ORRXrs move.
6666   if (MI.getOpcode() == AArch64::ORRXrs &&
6667       TRI->isSubRegister(DestReg, DescribedReg)) {
6668     Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
6669     return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
6670   }
6671 
6672   assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
6673          "Unhandled ORR[XW]rs copy case");
6674 
6675   return None;
6676 }
6677 
6678 Optional<ParamLoadedValue>
6679 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
6680                                       Register Reg) const {
6681   const MachineFunction *MF = MI.getMF();
6682   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6683   switch (MI.getOpcode()) {
6684   case AArch64::MOVZWi:
6685   case AArch64::MOVZXi: {
6686     // MOVZWi may be used for producing zero-extended 32-bit immediates in
6687     // 64-bit parameters, so we need to consider super-registers.
6688     if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
6689       return None;
6690 
6691     if (!MI.getOperand(1).isImm())
6692       return None;
6693     int64_t Immediate = MI.getOperand(1).getImm();
6694     int Shift = MI.getOperand(2).getImm();
6695     return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
6696                             nullptr);
6697   }
6698   case AArch64::ORRWrs:
6699   case AArch64::ORRXrs:
6700     return describeORRLoadedValue(MI, Reg, this, TRI);
6701   }
6702 
6703   return TargetInstrInfo::describeLoadedValue(MI, Reg);
6704 }
6705 
6706 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
6707   return get(Opc).TSFlags & AArch64::ElementSizeMask;
6708 }
6709 
6710 #define GET_INSTRINFO_HELPERS
6711 #define GET_INSTRMAP_INFO
6712 #include "AArch64GenInstrInfo.inc"
6713