1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUCodeObjectMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyOffset0,
136     ImmTyOffset1,
137     ImmTyGLC,
138     ImmTySLC,
139     ImmTyTFE,
140     ImmTyClampSI,
141     ImmTyOModSI,
142     ImmTyDppCtrl,
143     ImmTyDppRowMask,
144     ImmTyDppBankMask,
145     ImmTyDppBoundCtrl,
146     ImmTySdwaDstSel,
147     ImmTySdwaSrc0Sel,
148     ImmTySdwaSrc1Sel,
149     ImmTySdwaDstUnused,
150     ImmTyDMask,
151     ImmTyUNorm,
152     ImmTyDA,
153     ImmTyR128,
154     ImmTyLWE,
155     ImmTyExpTgt,
156     ImmTyExpCompr,
157     ImmTyExpVM,
158     ImmTyDFMT,
159     ImmTyNFMT,
160     ImmTyHwreg,
161     ImmTyOff,
162     ImmTySendMsg,
163     ImmTyInterpSlot,
164     ImmTyInterpAttr,
165     ImmTyAttrChan,
166     ImmTyOpSel,
167     ImmTyOpSelHi,
168     ImmTyNegLo,
169     ImmTyNegHi,
170     ImmTySwizzle,
171     ImmTyHigh
172   };
173 
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     bool IsForcedVOP3;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     if (Kind != Expression || !Expr)
204       return false;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   bool isInlinableImm(MVT type) const;
218   bool isLiteralImm(MVT type) const;
219 
220   bool isRegKind() const {
221     return Kind == Register;
222   }
223 
224   bool isReg() const override {
225     return isRegKind() && !hasModifiers();
226   }
227 
228   bool isRegOrImmWithInputMods(MVT type) const {
229     return isRegKind() || isInlinableImm(type);
230   }
231 
232   bool isRegOrImmWithInt16InputMods() const {
233     return isRegOrImmWithInputMods(MVT::i16);
234   }
235 
236   bool isRegOrImmWithInt32InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i32);
238   }
239 
240   bool isRegOrImmWithInt64InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i64);
242   }
243 
244   bool isRegOrImmWithFP16InputMods() const {
245     return isRegOrImmWithInputMods(MVT::f16);
246   }
247 
248   bool isRegOrImmWithFP32InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f32);
250   }
251 
252   bool isRegOrImmWithFP64InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f64);
254   }
255 
256   bool isVReg() const {
257     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
258            isRegClass(AMDGPU::VReg_64RegClassID) ||
259            isRegClass(AMDGPU::VReg_96RegClassID) ||
260            isRegClass(AMDGPU::VReg_128RegClassID) ||
261            isRegClass(AMDGPU::VReg_256RegClassID) ||
262            isRegClass(AMDGPU::VReg_512RegClassID);
263   }
264 
265   bool isVReg32OrOff() const {
266     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
267   }
268 
269   bool isSDWARegKind() const;
270 
271   bool isImmTy(ImmTy ImmT) const {
272     return isImm() && Imm.Type == ImmT;
273   }
274 
275   bool isImmModifier() const {
276     return isImm() && Imm.Type != ImmTyNone;
277   }
278 
279   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
280   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
281   bool isDMask() const { return isImmTy(ImmTyDMask); }
282   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
283   bool isDA() const { return isImmTy(ImmTyDA); }
284   bool isR128() const { return isImmTy(ImmTyUNorm); }
285   bool isLWE() const { return isImmTy(ImmTyLWE); }
286   bool isOff() const { return isImmTy(ImmTyOff); }
287   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
288   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
289   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
290   bool isOffen() const { return isImmTy(ImmTyOffen); }
291   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
292   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
293   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
294   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
295   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
296 
297   bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
298   bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
299   bool isGDS() const { return isImmTy(ImmTyGDS); }
300   bool isGLC() const { return isImmTy(ImmTyGLC); }
301   bool isSLC() const { return isImmTy(ImmTySLC); }
302   bool isTFE() const { return isImmTy(ImmTyTFE); }
303   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
304   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
305   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
306   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
307   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
308   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
309   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
310   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
311   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
312   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
313   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
314   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
315   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
316   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
317   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
318   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
319   bool isHigh() const { return isImmTy(ImmTyHigh); }
320 
321   bool isMod() const {
322     return isClampSI() || isOModSI();
323   }
324 
325   bool isRegOrImm() const {
326     return isReg() || isImm();
327   }
328 
329   bool isRegClass(unsigned RCID) const;
330 
331   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
332     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
333   }
334 
335   bool isSCSrcB16() const {
336     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
337   }
338 
339   bool isSCSrcV2B16() const {
340     return isSCSrcB16();
341   }
342 
343   bool isSCSrcB32() const {
344     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
345   }
346 
347   bool isSCSrcB64() const {
348     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
349   }
350 
351   bool isSCSrcF16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
353   }
354 
355   bool isSCSrcV2F16() const {
356     return isSCSrcF16();
357   }
358 
359   bool isSCSrcF32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
361   }
362 
363   bool isSCSrcF64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
365   }
366 
367   bool isSSrcB32() const {
368     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
369   }
370 
371   bool isSSrcB16() const {
372     return isSCSrcB16() || isLiteralImm(MVT::i16);
373   }
374 
375   bool isSSrcV2B16() const {
376     llvm_unreachable("cannot happen");
377     return isSSrcB16();
378   }
379 
380   bool isSSrcB64() const {
381     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
382     // See isVSrc64().
383     return isSCSrcB64() || isLiteralImm(MVT::i64);
384   }
385 
386   bool isSSrcF32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
388   }
389 
390   bool isSSrcF64() const {
391     return isSCSrcB64() || isLiteralImm(MVT::f64);
392   }
393 
394   bool isSSrcF16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::f16);
396   }
397 
398   bool isSSrcV2F16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcF16();
401   }
402 
403   bool isVCSrcB32() const {
404     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
405   }
406 
407   bool isVCSrcB64() const {
408     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
409   }
410 
411   bool isVCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
413   }
414 
415   bool isVCSrcV2B16() const {
416     return isVCSrcB16();
417   }
418 
419   bool isVCSrcF32() const {
420     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
421   }
422 
423   bool isVCSrcF64() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
425   }
426 
427   bool isVCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
429   }
430 
431   bool isVCSrcV2F16() const {
432     return isVCSrcF16();
433   }
434 
435   bool isVSrcB32() const {
436     return isVCSrcF32() || isLiteralImm(MVT::i32);
437   }
438 
439   bool isVSrcB64() const {
440     return isVCSrcF64() || isLiteralImm(MVT::i64);
441   }
442 
443   bool isVSrcB16() const {
444     return isVCSrcF16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isVSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isVSrcB16();
450   }
451 
452   bool isVSrcF32() const {
453     return isVCSrcF32() || isLiteralImm(MVT::f32);
454   }
455 
456   bool isVSrcF64() const {
457     return isVCSrcF64() || isLiteralImm(MVT::f64);
458   }
459 
460   bool isVSrcF16() const {
461     return isVCSrcF16() || isLiteralImm(MVT::f16);
462   }
463 
464   bool isVSrcV2F16() const {
465     llvm_unreachable("cannot happen");
466     return isVSrcF16();
467   }
468 
469   bool isKImmFP32() const {
470     return isLiteralImm(MVT::f32);
471   }
472 
473   bool isKImmFP16() const {
474     return isLiteralImm(MVT::f16);
475   }
476 
477   bool isMem() const override {
478     return false;
479   }
480 
481   bool isExpr() const {
482     return Kind == Expression;
483   }
484 
485   bool isSoppBrTarget() const {
486     return isExpr() || isImm();
487   }
488 
489   bool isSWaitCnt() const;
490   bool isHwreg() const;
491   bool isSendMsg() const;
492   bool isSwizzle() const;
493   bool isSMRDOffset8() const;
494   bool isSMRDOffset20() const;
495   bool isSMRDLiteralOffset() const;
496   bool isDPPCtrl() const;
497   bool isGPRIdxMode() const;
498   bool isS16Imm() const;
499   bool isU16Imm() const;
500 
501   StringRef getExpressionAsToken() const {
502     assert(isExpr());
503     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
504     return S->getSymbol().getName();
505   }
506 
507   StringRef getToken() const {
508     assert(isToken());
509 
510     if (Kind == Expression)
511       return getExpressionAsToken();
512 
513     return StringRef(Tok.Data, Tok.Length);
514   }
515 
516   int64_t getImm() const {
517     assert(isImm());
518     return Imm.Val;
519   }
520 
521   ImmTy getImmTy() const {
522     assert(isImm());
523     return Imm.Type;
524   }
525 
526   unsigned getReg() const override {
527     return Reg.RegNo;
528   }
529 
530   SMLoc getStartLoc() const override {
531     return StartLoc;
532   }
533 
534   SMLoc getEndLoc() const override {
535     return EndLoc;
536   }
537 
538   Modifiers getModifiers() const {
539     assert(isRegKind() || isImmTy(ImmTyNone));
540     return isRegKind() ? Reg.Mods : Imm.Mods;
541   }
542 
543   void setModifiers(Modifiers Mods) {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     if (isRegKind())
546       Reg.Mods = Mods;
547     else
548       Imm.Mods = Mods;
549   }
550 
551   bool hasModifiers() const {
552     return getModifiers().hasModifiers();
553   }
554 
555   bool hasFPModifiers() const {
556     return getModifiers().hasFPModifiers();
557   }
558 
559   bool hasIntModifiers() const {
560     return getModifiers().hasIntModifiers();
561   }
562 
563   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
564 
565   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
566 
567   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
568 
569   template <unsigned Bitwidth>
570   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
571 
572   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
573     addKImmFPOperands<16>(Inst, N);
574   }
575 
576   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
577     addKImmFPOperands<32>(Inst, N);
578   }
579 
580   void addRegOperands(MCInst &Inst, unsigned N) const;
581 
582   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
583     if (isRegKind())
584       addRegOperands(Inst, N);
585     else if (isExpr())
586       Inst.addOperand(MCOperand::createExpr(Expr));
587     else
588       addImmOperands(Inst, N);
589   }
590 
591   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
592     Modifiers Mods = getModifiers();
593     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
594     if (isRegKind()) {
595       addRegOperands(Inst, N);
596     } else {
597       addImmOperands(Inst, N, false);
598     }
599   }
600 
601   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
602     assert(!hasIntModifiers());
603     addRegOrImmWithInputModsOperands(Inst, N);
604   }
605 
606   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasFPModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
612     Modifiers Mods = getModifiers();
613     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
614     assert(isRegKind());
615     addRegOperands(Inst, N);
616   }
617 
618   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
619     assert(!hasIntModifiers());
620     addRegWithInputModsOperands(Inst, N);
621   }
622 
623   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasFPModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
629     if (isImm())
630       addImmOperands(Inst, N);
631     else {
632       assert(isExpr());
633       Inst.addOperand(MCOperand::createExpr(Expr));
634     }
635   }
636 
637   static void printImmTy(raw_ostream& OS, ImmTy Type) {
638     switch (Type) {
639     case ImmTyNone: OS << "None"; break;
640     case ImmTyGDS: OS << "GDS"; break;
641     case ImmTyOffen: OS << "Offen"; break;
642     case ImmTyIdxen: OS << "Idxen"; break;
643     case ImmTyAddr64: OS << "Addr64"; break;
644     case ImmTyOffset: OS << "Offset"; break;
645     case ImmTyOffset0: OS << "Offset0"; break;
646     case ImmTyOffset1: OS << "Offset1"; break;
647     case ImmTyGLC: OS << "GLC"; break;
648     case ImmTySLC: OS << "SLC"; break;
649     case ImmTyTFE: OS << "TFE"; break;
650     case ImmTyDFMT: OS << "DFMT"; break;
651     case ImmTyNFMT: OS << "NFMT"; break;
652     case ImmTyClampSI: OS << "ClampSI"; break;
653     case ImmTyOModSI: OS << "OModSI"; break;
654     case ImmTyDppCtrl: OS << "DppCtrl"; break;
655     case ImmTyDppRowMask: OS << "DppRowMask"; break;
656     case ImmTyDppBankMask: OS << "DppBankMask"; break;
657     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
658     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
659     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
660     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
661     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
662     case ImmTyDMask: OS << "DMask"; break;
663     case ImmTyUNorm: OS << "UNorm"; break;
664     case ImmTyDA: OS << "DA"; break;
665     case ImmTyR128: OS << "R128"; break;
666     case ImmTyLWE: OS << "LWE"; break;
667     case ImmTyOff: OS << "Off"; break;
668     case ImmTyExpTgt: OS << "ExpTgt"; break;
669     case ImmTyExpCompr: OS << "ExpCompr"; break;
670     case ImmTyExpVM: OS << "ExpVM"; break;
671     case ImmTyHwreg: OS << "Hwreg"; break;
672     case ImmTySendMsg: OS << "SendMsg"; break;
673     case ImmTyInterpSlot: OS << "InterpSlot"; break;
674     case ImmTyInterpAttr: OS << "InterpAttr"; break;
675     case ImmTyAttrChan: OS << "AttrChan"; break;
676     case ImmTyOpSel: OS << "OpSel"; break;
677     case ImmTyOpSelHi: OS << "OpSelHi"; break;
678     case ImmTyNegLo: OS << "NegLo"; break;
679     case ImmTyNegHi: OS << "NegHi"; break;
680     case ImmTySwizzle: OS << "Swizzle"; break;
681     case ImmTyHigh: OS << "High"; break;
682     }
683   }
684 
685   void print(raw_ostream &OS) const override {
686     switch (Kind) {
687     case Register:
688       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
689       break;
690     case Immediate:
691       OS << '<' << getImm();
692       if (getImmTy() != ImmTyNone) {
693         OS << " type: "; printImmTy(OS, getImmTy());
694       }
695       OS << " mods: " << Imm.Mods << '>';
696       break;
697     case Token:
698       OS << '\'' << getToken() << '\'';
699       break;
700     case Expression:
701       OS << "<expr " << *Expr << '>';
702       break;
703     }
704   }
705 
706   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
707                                       int64_t Val, SMLoc Loc,
708                                       ImmTy Type = ImmTyNone,
709                                       bool IsFPImm = false) {
710     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
711     Op->Imm.Val = Val;
712     Op->Imm.IsFPImm = IsFPImm;
713     Op->Imm.Type = Type;
714     Op->Imm.Mods = Modifiers();
715     Op->StartLoc = Loc;
716     Op->EndLoc = Loc;
717     return Op;
718   }
719 
720   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
721                                         StringRef Str, SMLoc Loc,
722                                         bool HasExplicitEncodingSize = true) {
723     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
724     Res->Tok.Data = Str.data();
725     Res->Tok.Length = Str.size();
726     Res->StartLoc = Loc;
727     Res->EndLoc = Loc;
728     return Res;
729   }
730 
731   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
732                                       unsigned RegNo, SMLoc S,
733                                       SMLoc E,
734                                       bool ForceVOP3) {
735     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
736     Op->Reg.RegNo = RegNo;
737     Op->Reg.Mods = Modifiers();
738     Op->Reg.IsForcedVOP3 = ForceVOP3;
739     Op->StartLoc = S;
740     Op->EndLoc = E;
741     return Op;
742   }
743 
744   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
745                                        const class MCExpr *Expr, SMLoc S) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
747     Op->Expr = Expr;
748     Op->StartLoc = S;
749     Op->EndLoc = S;
750     return Op;
751   }
752 };
753 
754 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
755   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
756   return OS;
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // AsmParser
761 //===----------------------------------------------------------------------===//
762 
763 // Holds info related to the current kernel, e.g. count of SGPRs used.
764 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
765 // .amdgpu_hsa_kernel or at EOF.
766 class KernelScopeInfo {
767   int SgprIndexUnusedMin = -1;
768   int VgprIndexUnusedMin = -1;
769   MCContext *Ctx = nullptr;
770 
771   void usesSgprAt(int i) {
772     if (i >= SgprIndexUnusedMin) {
773       SgprIndexUnusedMin = ++i;
774       if (Ctx) {
775         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
776         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
777       }
778     }
779   }
780 
781   void usesVgprAt(int i) {
782     if (i >= VgprIndexUnusedMin) {
783       VgprIndexUnusedMin = ++i;
784       if (Ctx) {
785         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
786         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
787       }
788     }
789   }
790 
791 public:
792   KernelScopeInfo() = default;
793 
794   void initialize(MCContext &Context) {
795     Ctx = &Context;
796     usesSgprAt(SgprIndexUnusedMin = -1);
797     usesVgprAt(VgprIndexUnusedMin = -1);
798   }
799 
800   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
801     switch (RegKind) {
802       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
803       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
804       default: break;
805     }
806   }
807 };
808 
809 class AMDGPUAsmParser : public MCTargetAsmParser {
810   const MCInstrInfo &MII;
811   MCAsmParser &Parser;
812 
813   unsigned ForcedEncodingSize = 0;
814   bool ForcedDPP = false;
815   bool ForcedSDWA = false;
816   KernelScopeInfo KernelScope;
817 
818   /// @name Auto-generated Match Functions
819   /// {
820 
821 #define GET_ASSEMBLER_HEADER
822 #include "AMDGPUGenAsmMatcher.inc"
823 
824   /// }
825 
826 private:
827   bool ParseAsAbsoluteExpression(uint32_t &Ret);
828   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
829   bool ParseDirectiveHSACodeObjectVersion();
830   bool ParseDirectiveHSACodeObjectISA();
831   bool ParseDirectiveCodeObjectMetadata();
832   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
833   bool ParseDirectiveAMDKernelCodeT();
834   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
835   bool ParseDirectiveAMDGPUHsaKernel();
836   bool ParseDirectivePalMetadata();
837   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
838                              RegisterKind RegKind, unsigned Reg1,
839                              unsigned RegNum);
840   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
841                            unsigned& RegNum, unsigned& RegWidth,
842                            unsigned *DwordRegIndex);
843   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
844                     bool IsAtomic, bool IsAtomicReturn);
845   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
846                  bool IsGdsHardcoded);
847 
848 public:
849   enum AMDGPUMatchResultTy {
850     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
851   };
852 
853   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
854 
855   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
856                const MCInstrInfo &MII,
857                const MCTargetOptions &Options)
858       : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
859     MCAsmParserExtension::Initialize(Parser);
860 
861     if (getFeatureBits().none()) {
862       // Set default features.
863       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
864     }
865 
866     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
867 
868     {
869       // TODO: make those pre-defined variables read-only.
870       // Currently there is none suitable machinery in the core llvm-mc for this.
871       // MCSymbol::isRedefinable is intended for another purpose, and
872       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
873       AMDGPU::IsaInfo::IsaVersion ISA =
874           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
875       MCContext &Ctx = getContext();
876       MCSymbol *Sym =
877           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
878       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
879       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
880       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
881       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
882       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
883     }
884     KernelScope.initialize(getContext());
885   }
886 
887   bool isSI() const {
888     return AMDGPU::isSI(getSTI());
889   }
890 
891   bool isCI() const {
892     return AMDGPU::isCI(getSTI());
893   }
894 
895   bool isVI() const {
896     return AMDGPU::isVI(getSTI());
897   }
898 
899   bool isGFX9() const {
900     return AMDGPU::isGFX9(getSTI());
901   }
902 
903   bool hasInv2PiInlineImm() const {
904     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
905   }
906 
907   bool hasFlatOffsets() const {
908     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
909   }
910 
911   bool hasSGPR102_SGPR103() const {
912     return !isVI();
913   }
914 
915   bool hasIntClamp() const {
916     return getFeatureBits()[AMDGPU::FeatureIntClamp];
917   }
918 
919   AMDGPUTargetStreamer &getTargetStreamer() {
920     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
921     return static_cast<AMDGPUTargetStreamer &>(TS);
922   }
923 
924   const MCRegisterInfo *getMRI() const {
925     // We need this const_cast because for some reason getContext() is not const
926     // in MCAsmParser.
927     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
928   }
929 
930   const MCInstrInfo *getMII() const {
931     return &MII;
932   }
933 
934   const FeatureBitset &getFeatureBits() const {
935     return getSTI().getFeatureBits();
936   }
937 
938   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
939   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
940   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
941 
942   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
943   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
944   bool isForcedDPP() const { return ForcedDPP; }
945   bool isForcedSDWA() const { return ForcedSDWA; }
946   ArrayRef<unsigned> getMatchedVariants() const;
947 
948   std::unique_ptr<AMDGPUOperand> parseRegister();
949   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
950   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
951   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
952                                       unsigned Kind) override;
953   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
954                                OperandVector &Operands, MCStreamer &Out,
955                                uint64_t &ErrorInfo,
956                                bool MatchingInlineAsm) override;
957   bool ParseDirective(AsmToken DirectiveID) override;
958   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
959   StringRef parseMnemonicSuffix(StringRef Name);
960   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
961                         SMLoc NameLoc, OperandVector &Operands) override;
962   //bool ProcessInstruction(MCInst &Inst);
963 
964   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
965 
966   OperandMatchResultTy
967   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
968                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
969                      bool (*ConvertResult)(int64_t &) = nullptr);
970 
971   OperandMatchResultTy parseOperandArrayWithPrefix(
972     const char *Prefix,
973     OperandVector &Operands,
974     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
975     bool (*ConvertResult)(int64_t&) = nullptr);
976 
977   OperandMatchResultTy
978   parseNamedBit(const char *Name, OperandVector &Operands,
979                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
980   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
981                                              StringRef &Value);
982 
983   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
984   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
985   OperandMatchResultTy parseReg(OperandVector &Operands);
986   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
987   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
988   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
989   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
990   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
991   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
992 
993   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
994   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
995   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
996   void cvtExp(MCInst &Inst, const OperandVector &Operands);
997 
998   bool parseCnt(int64_t &IntVal);
999   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1000   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1001 
1002 private:
1003   struct OperandInfoTy {
1004     int64_t Id;
1005     bool IsSymbolic = false;
1006 
1007     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1008   };
1009 
1010   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1011   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1012 
1013   void errorExpTgt();
1014   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1015 
1016   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1017   bool validateConstantBusLimitations(const MCInst &Inst);
1018   bool validateEarlyClobberLimitations(const MCInst &Inst);
1019   bool validateIntClampSupported(const MCInst &Inst);
1020   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1021   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1022   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1023 
1024   bool trySkipId(const StringRef Id);
1025   bool trySkipToken(const AsmToken::TokenKind Kind);
1026   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1027   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1028   bool parseExpr(int64_t &Imm);
1029 
1030 public:
1031   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1032 
1033   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1034   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1035   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1036   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1037   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1038 
1039   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1040                             const unsigned MinVal,
1041                             const unsigned MaxVal,
1042                             const StringRef ErrMsg);
1043   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1044   bool parseSwizzleOffset(int64_t &Imm);
1045   bool parseSwizzleMacro(int64_t &Imm);
1046   bool parseSwizzleQuadPerm(int64_t &Imm);
1047   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1048   bool parseSwizzleBroadcast(int64_t &Imm);
1049   bool parseSwizzleSwap(int64_t &Imm);
1050   bool parseSwizzleReverse(int64_t &Imm);
1051 
1052   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1053   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1054   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1055   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1056 
1057   AMDGPUOperand::Ptr defaultGLC() const;
1058   AMDGPUOperand::Ptr defaultSLC() const;
1059   AMDGPUOperand::Ptr defaultTFE() const;
1060 
1061   AMDGPUOperand::Ptr defaultDMask() const;
1062   AMDGPUOperand::Ptr defaultUNorm() const;
1063   AMDGPUOperand::Ptr defaultDA() const;
1064   AMDGPUOperand::Ptr defaultR128() const;
1065   AMDGPUOperand::Ptr defaultLWE() const;
1066   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1067   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1068   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1069   AMDGPUOperand::Ptr defaultOffsetU12() const;
1070   AMDGPUOperand::Ptr defaultOffsetS13() const;
1071 
1072   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1073 
1074   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1075                OptionalImmIndexMap &OptionalIdx);
1076   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1077   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1078   void cvtVOP3PImpl(MCInst &Inst, const OperandVector &Operands,
1079                     bool IsPacked);
1080   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1081   void cvtVOP3P_NotPacked(MCInst &Inst, const OperandVector &Operands);
1082 
1083   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1084 
1085   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1086                bool IsAtomic = false);
1087   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1088 
1089   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1090   AMDGPUOperand::Ptr defaultRowMask() const;
1091   AMDGPUOperand::Ptr defaultBankMask() const;
1092   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1093   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1094 
1095   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1096                                     AMDGPUOperand::ImmTy Type);
1097   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1098   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1099   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1100   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1101   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1102   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1103                 uint64_t BasicInstType, bool skipVcc = false);
1104 };
1105 
1106 struct OptionalOperand {
1107   const char *Name;
1108   AMDGPUOperand::ImmTy Type;
1109   bool IsBit;
1110   bool (*ConvertResult)(int64_t&);
1111 };
1112 
1113 } // end anonymous namespace
1114 
1115 // May be called with integer type with equivalent bitwidth.
1116 static const fltSemantics *getFltSemantics(unsigned Size) {
1117   switch (Size) {
1118   case 4:
1119     return &APFloat::IEEEsingle();
1120   case 8:
1121     return &APFloat::IEEEdouble();
1122   case 2:
1123     return &APFloat::IEEEhalf();
1124   default:
1125     llvm_unreachable("unsupported fp type");
1126   }
1127 }
1128 
1129 static const fltSemantics *getFltSemantics(MVT VT) {
1130   return getFltSemantics(VT.getSizeInBits() / 8);
1131 }
1132 
1133 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1134   switch (OperandType) {
1135   case AMDGPU::OPERAND_REG_IMM_INT32:
1136   case AMDGPU::OPERAND_REG_IMM_FP32:
1137   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1138   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1139     return &APFloat::IEEEsingle();
1140   case AMDGPU::OPERAND_REG_IMM_INT64:
1141   case AMDGPU::OPERAND_REG_IMM_FP64:
1142   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1143   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1144     return &APFloat::IEEEdouble();
1145   case AMDGPU::OPERAND_REG_IMM_INT16:
1146   case AMDGPU::OPERAND_REG_IMM_FP16:
1147   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1148   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1149   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1150   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1151     return &APFloat::IEEEhalf();
1152   default:
1153     llvm_unreachable("unsupported fp type");
1154   }
1155 }
1156 
1157 //===----------------------------------------------------------------------===//
1158 // Operand
1159 //===----------------------------------------------------------------------===//
1160 
1161 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1162   bool Lost;
1163 
1164   // Convert literal to single precision
1165   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1166                                                APFloat::rmNearestTiesToEven,
1167                                                &Lost);
1168   // We allow precision lost but not overflow or underflow
1169   if (Status != APFloat::opOK &&
1170       Lost &&
1171       ((Status & APFloat::opOverflow)  != 0 ||
1172        (Status & APFloat::opUnderflow) != 0)) {
1173     return false;
1174   }
1175 
1176   return true;
1177 }
1178 
1179 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1180   if (!isImmTy(ImmTyNone)) {
1181     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1182     return false;
1183   }
1184   // TODO: We should avoid using host float here. It would be better to
1185   // check the float bit values which is what a few other places do.
1186   // We've had bot failures before due to weird NaN support on mips hosts.
1187 
1188   APInt Literal(64, Imm.Val);
1189 
1190   if (Imm.IsFPImm) { // We got fp literal token
1191     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1192       return AMDGPU::isInlinableLiteral64(Imm.Val,
1193                                           AsmParser->hasInv2PiInlineImm());
1194     }
1195 
1196     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1197     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1198       return false;
1199 
1200     if (type.getScalarSizeInBits() == 16) {
1201       return AMDGPU::isInlinableLiteral16(
1202         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1203         AsmParser->hasInv2PiInlineImm());
1204     }
1205 
1206     // Check if single precision literal is inlinable
1207     return AMDGPU::isInlinableLiteral32(
1208       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1209       AsmParser->hasInv2PiInlineImm());
1210   }
1211 
1212   // We got int literal token.
1213   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1214     return AMDGPU::isInlinableLiteral64(Imm.Val,
1215                                         AsmParser->hasInv2PiInlineImm());
1216   }
1217 
1218   if (type.getScalarSizeInBits() == 16) {
1219     return AMDGPU::isInlinableLiteral16(
1220       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1221       AsmParser->hasInv2PiInlineImm());
1222   }
1223 
1224   return AMDGPU::isInlinableLiteral32(
1225     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1226     AsmParser->hasInv2PiInlineImm());
1227 }
1228 
1229 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1230   // Check that this immediate can be added as literal
1231   if (!isImmTy(ImmTyNone)) {
1232     return false;
1233   }
1234 
1235   if (!Imm.IsFPImm) {
1236     // We got int literal token.
1237 
1238     if (type == MVT::f64 && hasFPModifiers()) {
1239       // Cannot apply fp modifiers to int literals preserving the same semantics
1240       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1241       // disable these cases.
1242       return false;
1243     }
1244 
1245     unsigned Size = type.getSizeInBits();
1246     if (Size == 64)
1247       Size = 32;
1248 
1249     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1250     // types.
1251     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1252   }
1253 
1254   // We got fp literal token
1255   if (type == MVT::f64) { // Expected 64-bit fp operand
1256     // We would set low 64-bits of literal to zeroes but we accept this literals
1257     return true;
1258   }
1259 
1260   if (type == MVT::i64) { // Expected 64-bit int operand
1261     // We don't allow fp literals in 64-bit integer instructions. It is
1262     // unclear how we should encode them.
1263     return false;
1264   }
1265 
1266   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1267   return canLosslesslyConvertToFPType(FPLiteral, type);
1268 }
1269 
1270 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1271   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1272 }
1273 
1274 bool AMDGPUOperand::isSDWARegKind() const {
1275   if (AsmParser->isVI())
1276     return isVReg();
1277   else if (AsmParser->isGFX9())
1278     return isRegKind();
1279   else
1280     return false;
1281 }
1282 
1283 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1284 {
1285   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1286   assert(Size == 2 || Size == 4 || Size == 8);
1287 
1288   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1289 
1290   if (Imm.Mods.Abs) {
1291     Val &= ~FpSignMask;
1292   }
1293   if (Imm.Mods.Neg) {
1294     Val ^= FpSignMask;
1295   }
1296 
1297   return Val;
1298 }
1299 
1300 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1301   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1302                              Inst.getNumOperands())) {
1303     addLiteralImmOperand(Inst, Imm.Val,
1304                          ApplyModifiers &
1305                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1306   } else {
1307     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1308     Inst.addOperand(MCOperand::createImm(Imm.Val));
1309   }
1310 }
1311 
1312 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1313   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1314   auto OpNum = Inst.getNumOperands();
1315   // Check that this operand accepts literals
1316   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1317 
1318   if (ApplyModifiers) {
1319     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1320     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1321     Val = applyInputFPModifiers(Val, Size);
1322   }
1323 
1324   APInt Literal(64, Val);
1325   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1326 
1327   if (Imm.IsFPImm) { // We got fp literal token
1328     switch (OpTy) {
1329     case AMDGPU::OPERAND_REG_IMM_INT64:
1330     case AMDGPU::OPERAND_REG_IMM_FP64:
1331     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1332     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1333       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1334                                        AsmParser->hasInv2PiInlineImm())) {
1335         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1336         return;
1337       }
1338 
1339       // Non-inlineable
1340       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1341         // For fp operands we check if low 32 bits are zeros
1342         if (Literal.getLoBits(32) != 0) {
1343           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1344           "Can't encode literal as exact 64-bit floating-point operand. "
1345           "Low 32-bits will be set to zero");
1346         }
1347 
1348         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1349         return;
1350       }
1351 
1352       // We don't allow fp literals in 64-bit integer instructions. It is
1353       // unclear how we should encode them. This case should be checked earlier
1354       // in predicate methods (isLiteralImm())
1355       llvm_unreachable("fp literal in 64-bit integer instruction.");
1356 
1357     case AMDGPU::OPERAND_REG_IMM_INT32:
1358     case AMDGPU::OPERAND_REG_IMM_FP32:
1359     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1360     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1361     case AMDGPU::OPERAND_REG_IMM_INT16:
1362     case AMDGPU::OPERAND_REG_IMM_FP16:
1363     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1364     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1365     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1366     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1367       bool lost;
1368       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1369       // Convert literal to single precision
1370       FPLiteral.convert(*getOpFltSemantics(OpTy),
1371                         APFloat::rmNearestTiesToEven, &lost);
1372       // We allow precision lost but not overflow or underflow. This should be
1373       // checked earlier in isLiteralImm()
1374 
1375       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1376       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1377           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1378         ImmVal |= (ImmVal << 16);
1379       }
1380 
1381       Inst.addOperand(MCOperand::createImm(ImmVal));
1382       return;
1383     }
1384     default:
1385       llvm_unreachable("invalid operand size");
1386     }
1387 
1388     return;
1389   }
1390 
1391    // We got int literal token.
1392   // Only sign extend inline immediates.
1393   // FIXME: No errors on truncation
1394   switch (OpTy) {
1395   case AMDGPU::OPERAND_REG_IMM_INT32:
1396   case AMDGPU::OPERAND_REG_IMM_FP32:
1397   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1398   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1399     if (isInt<32>(Val) &&
1400         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1401                                      AsmParser->hasInv2PiInlineImm())) {
1402       Inst.addOperand(MCOperand::createImm(Val));
1403       return;
1404     }
1405 
1406     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1407     return;
1408 
1409   case AMDGPU::OPERAND_REG_IMM_INT64:
1410   case AMDGPU::OPERAND_REG_IMM_FP64:
1411   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1412   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1413     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1414       Inst.addOperand(MCOperand::createImm(Val));
1415       return;
1416     }
1417 
1418     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1419     return;
1420 
1421   case AMDGPU::OPERAND_REG_IMM_INT16:
1422   case AMDGPU::OPERAND_REG_IMM_FP16:
1423   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1424   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1425     if (isInt<16>(Val) &&
1426         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1427                                      AsmParser->hasInv2PiInlineImm())) {
1428       Inst.addOperand(MCOperand::createImm(Val));
1429       return;
1430     }
1431 
1432     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1433     return;
1434 
1435   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1436   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1437     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1438     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1439                                         AsmParser->hasInv2PiInlineImm()));
1440 
1441     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1442                       static_cast<uint32_t>(LiteralVal);
1443     Inst.addOperand(MCOperand::createImm(ImmVal));
1444     return;
1445   }
1446   default:
1447     llvm_unreachable("invalid operand size");
1448   }
1449 }
1450 
1451 template <unsigned Bitwidth>
1452 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1453   APInt Literal(64, Imm.Val);
1454 
1455   if (!Imm.IsFPImm) {
1456     // We got int literal token.
1457     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1458     return;
1459   }
1460 
1461   bool Lost;
1462   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1463   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1464                     APFloat::rmNearestTiesToEven, &Lost);
1465   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1466 }
1467 
1468 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1469   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1470 }
1471 
1472 //===----------------------------------------------------------------------===//
1473 // AsmParser
1474 //===----------------------------------------------------------------------===//
1475 
1476 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1477   if (Is == IS_VGPR) {
1478     switch (RegWidth) {
1479       default: return -1;
1480       case 1: return AMDGPU::VGPR_32RegClassID;
1481       case 2: return AMDGPU::VReg_64RegClassID;
1482       case 3: return AMDGPU::VReg_96RegClassID;
1483       case 4: return AMDGPU::VReg_128RegClassID;
1484       case 8: return AMDGPU::VReg_256RegClassID;
1485       case 16: return AMDGPU::VReg_512RegClassID;
1486     }
1487   } else if (Is == IS_TTMP) {
1488     switch (RegWidth) {
1489       default: return -1;
1490       case 1: return AMDGPU::TTMP_32RegClassID;
1491       case 2: return AMDGPU::TTMP_64RegClassID;
1492       case 4: return AMDGPU::TTMP_128RegClassID;
1493     }
1494   } else if (Is == IS_SGPR) {
1495     switch (RegWidth) {
1496       default: return -1;
1497       case 1: return AMDGPU::SGPR_32RegClassID;
1498       case 2: return AMDGPU::SGPR_64RegClassID;
1499       case 4: return AMDGPU::SGPR_128RegClassID;
1500       case 8: return AMDGPU::SReg_256RegClassID;
1501       case 16: return AMDGPU::SReg_512RegClassID;
1502     }
1503   }
1504   return -1;
1505 }
1506 
1507 static unsigned getSpecialRegForName(StringRef RegName) {
1508   return StringSwitch<unsigned>(RegName)
1509     .Case("exec", AMDGPU::EXEC)
1510     .Case("vcc", AMDGPU::VCC)
1511     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1512     .Case("m0", AMDGPU::M0)
1513     .Case("scc", AMDGPU::SCC)
1514     .Case("tba", AMDGPU::TBA)
1515     .Case("tma", AMDGPU::TMA)
1516     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1517     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1518     .Case("vcc_lo", AMDGPU::VCC_LO)
1519     .Case("vcc_hi", AMDGPU::VCC_HI)
1520     .Case("exec_lo", AMDGPU::EXEC_LO)
1521     .Case("exec_hi", AMDGPU::EXEC_HI)
1522     .Case("tma_lo", AMDGPU::TMA_LO)
1523     .Case("tma_hi", AMDGPU::TMA_HI)
1524     .Case("tba_lo", AMDGPU::TBA_LO)
1525     .Case("tba_hi", AMDGPU::TBA_HI)
1526     .Default(0);
1527 }
1528 
1529 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1530                                     SMLoc &EndLoc) {
1531   auto R = parseRegister();
1532   if (!R) return true;
1533   assert(R->isReg());
1534   RegNo = R->getReg();
1535   StartLoc = R->getStartLoc();
1536   EndLoc = R->getEndLoc();
1537   return false;
1538 }
1539 
1540 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1541                                             RegisterKind RegKind, unsigned Reg1,
1542                                             unsigned RegNum) {
1543   switch (RegKind) {
1544   case IS_SPECIAL:
1545     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1546       Reg = AMDGPU::EXEC;
1547       RegWidth = 2;
1548       return true;
1549     }
1550     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1551       Reg = AMDGPU::FLAT_SCR;
1552       RegWidth = 2;
1553       return true;
1554     }
1555     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1556       Reg = AMDGPU::VCC;
1557       RegWidth = 2;
1558       return true;
1559     }
1560     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1561       Reg = AMDGPU::TBA;
1562       RegWidth = 2;
1563       return true;
1564     }
1565     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1566       Reg = AMDGPU::TMA;
1567       RegWidth = 2;
1568       return true;
1569     }
1570     return false;
1571   case IS_VGPR:
1572   case IS_SGPR:
1573   case IS_TTMP:
1574     if (Reg1 != Reg + RegWidth) {
1575       return false;
1576     }
1577     RegWidth++;
1578     return true;
1579   default:
1580     llvm_unreachable("unexpected register kind");
1581   }
1582 }
1583 
1584 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1585                                           unsigned &RegNum, unsigned &RegWidth,
1586                                           unsigned *DwordRegIndex) {
1587   if (DwordRegIndex) { *DwordRegIndex = 0; }
1588   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1589   if (getLexer().is(AsmToken::Identifier)) {
1590     StringRef RegName = Parser.getTok().getString();
1591     if ((Reg = getSpecialRegForName(RegName))) {
1592       Parser.Lex();
1593       RegKind = IS_SPECIAL;
1594     } else {
1595       unsigned RegNumIndex = 0;
1596       if (RegName[0] == 'v') {
1597         RegNumIndex = 1;
1598         RegKind = IS_VGPR;
1599       } else if (RegName[0] == 's') {
1600         RegNumIndex = 1;
1601         RegKind = IS_SGPR;
1602       } else if (RegName.startswith("ttmp")) {
1603         RegNumIndex = strlen("ttmp");
1604         RegKind = IS_TTMP;
1605       } else {
1606         return false;
1607       }
1608       if (RegName.size() > RegNumIndex) {
1609         // Single 32-bit register: vXX.
1610         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1611           return false;
1612         Parser.Lex();
1613         RegWidth = 1;
1614       } else {
1615         // Range of registers: v[XX:YY]. ":YY" is optional.
1616         Parser.Lex();
1617         int64_t RegLo, RegHi;
1618         if (getLexer().isNot(AsmToken::LBrac))
1619           return false;
1620         Parser.Lex();
1621 
1622         if (getParser().parseAbsoluteExpression(RegLo))
1623           return false;
1624 
1625         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1626         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1627           return false;
1628         Parser.Lex();
1629 
1630         if (isRBrace) {
1631           RegHi = RegLo;
1632         } else {
1633           if (getParser().parseAbsoluteExpression(RegHi))
1634             return false;
1635 
1636           if (getLexer().isNot(AsmToken::RBrac))
1637             return false;
1638           Parser.Lex();
1639         }
1640         RegNum = (unsigned) RegLo;
1641         RegWidth = (RegHi - RegLo) + 1;
1642       }
1643     }
1644   } else if (getLexer().is(AsmToken::LBrac)) {
1645     // List of consecutive registers: [s0,s1,s2,s3]
1646     Parser.Lex();
1647     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1648       return false;
1649     if (RegWidth != 1)
1650       return false;
1651     RegisterKind RegKind1;
1652     unsigned Reg1, RegNum1, RegWidth1;
1653     do {
1654       if (getLexer().is(AsmToken::Comma)) {
1655         Parser.Lex();
1656       } else if (getLexer().is(AsmToken::RBrac)) {
1657         Parser.Lex();
1658         break;
1659       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1660         if (RegWidth1 != 1) {
1661           return false;
1662         }
1663         if (RegKind1 != RegKind) {
1664           return false;
1665         }
1666         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1667           return false;
1668         }
1669       } else {
1670         return false;
1671       }
1672     } while (true);
1673   } else {
1674     return false;
1675   }
1676   switch (RegKind) {
1677   case IS_SPECIAL:
1678     RegNum = 0;
1679     RegWidth = 1;
1680     break;
1681   case IS_VGPR:
1682   case IS_SGPR:
1683   case IS_TTMP:
1684   {
1685     unsigned Size = 1;
1686     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1687       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1688       Size = std::min(RegWidth, 4u);
1689     }
1690     if (RegNum % Size != 0)
1691       return false;
1692     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1693     RegNum = RegNum / Size;
1694     int RCID = getRegClass(RegKind, RegWidth);
1695     if (RCID == -1)
1696       return false;
1697     const MCRegisterClass RC = TRI->getRegClass(RCID);
1698     if (RegNum >= RC.getNumRegs())
1699       return false;
1700     Reg = RC.getRegister(RegNum);
1701     break;
1702   }
1703 
1704   default:
1705     llvm_unreachable("unexpected register kind");
1706   }
1707 
1708   if (!subtargetHasRegister(*TRI, Reg))
1709     return false;
1710   return true;
1711 }
1712 
1713 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1714   const auto &Tok = Parser.getTok();
1715   SMLoc StartLoc = Tok.getLoc();
1716   SMLoc EndLoc = Tok.getEndLoc();
1717   RegisterKind RegKind;
1718   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1719 
1720   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1721     return nullptr;
1722   }
1723   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1724   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1725 }
1726 
1727 bool
1728 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1729   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1730       (getLexer().getKind() == AsmToken::Integer ||
1731        getLexer().getKind() == AsmToken::Real)) {
1732     // This is a workaround for handling operands like these:
1733     //     |1.0|
1734     //     |-1|
1735     // This syntax is not compatible with syntax of standard
1736     // MC expressions (due to the trailing '|').
1737 
1738     SMLoc EndLoc;
1739     const MCExpr *Expr;
1740 
1741     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1742       return true;
1743     }
1744 
1745     return !Expr->evaluateAsAbsolute(Val);
1746   }
1747 
1748   return getParser().parseAbsoluteExpression(Val);
1749 }
1750 
1751 OperandMatchResultTy
1752 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1753   // TODO: add syntactic sugar for 1/(2*PI)
1754   bool Minus = false;
1755   if (getLexer().getKind() == AsmToken::Minus) {
1756     Minus = true;
1757     Parser.Lex();
1758   }
1759 
1760   SMLoc S = Parser.getTok().getLoc();
1761   switch(getLexer().getKind()) {
1762   case AsmToken::Integer: {
1763     int64_t IntVal;
1764     if (parseAbsoluteExpr(IntVal, AbsMod))
1765       return MatchOperand_ParseFail;
1766     if (Minus)
1767       IntVal *= -1;
1768     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1769     return MatchOperand_Success;
1770   }
1771   case AsmToken::Real: {
1772     int64_t IntVal;
1773     if (parseAbsoluteExpr(IntVal, AbsMod))
1774       return MatchOperand_ParseFail;
1775 
1776     APFloat F(BitsToDouble(IntVal));
1777     if (Minus)
1778       F.changeSign();
1779     Operands.push_back(
1780         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1781                                  AMDGPUOperand::ImmTyNone, true));
1782     return MatchOperand_Success;
1783   }
1784   default:
1785     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1786   }
1787 }
1788 
1789 OperandMatchResultTy
1790 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1791   if (auto R = parseRegister()) {
1792     assert(R->isReg());
1793     R->Reg.IsForcedVOP3 = isForcedVOP3();
1794     Operands.push_back(std::move(R));
1795     return MatchOperand_Success;
1796   }
1797   return MatchOperand_NoMatch;
1798 }
1799 
1800 OperandMatchResultTy
1801 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1802   auto res = parseImm(Operands, AbsMod);
1803   if (res != MatchOperand_NoMatch) {
1804     return res;
1805   }
1806 
1807   return parseReg(Operands);
1808 }
1809 
1810 OperandMatchResultTy
1811 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1812                                               bool AllowImm) {
1813   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1814 
1815   if (getLexer().getKind()== AsmToken::Minus) {
1816     const AsmToken NextToken = getLexer().peekTok();
1817 
1818     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1819     if (NextToken.is(AsmToken::Minus)) {
1820       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1821       return MatchOperand_ParseFail;
1822     }
1823 
1824     // '-' followed by an integer literal N should be interpreted as integer
1825     // negation rather than a floating-point NEG modifier applied to N.
1826     // Beside being contr-intuitive, such use of floating-point NEG modifier
1827     // results in different meaning of integer literals used with VOP1/2/C
1828     // and VOP3, for example:
1829     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1830     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1831     // Negative fp literals should be handled likewise for unifomtity
1832     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1833       Parser.Lex();
1834       Negate = true;
1835     }
1836   }
1837 
1838   if (getLexer().getKind() == AsmToken::Identifier &&
1839       Parser.getTok().getString() == "neg") {
1840     if (Negate) {
1841       Error(Parser.getTok().getLoc(), "expected register or immediate");
1842       return MatchOperand_ParseFail;
1843     }
1844     Parser.Lex();
1845     Negate2 = true;
1846     if (getLexer().isNot(AsmToken::LParen)) {
1847       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1848       return MatchOperand_ParseFail;
1849     }
1850     Parser.Lex();
1851   }
1852 
1853   if (getLexer().getKind() == AsmToken::Identifier &&
1854       Parser.getTok().getString() == "abs") {
1855     Parser.Lex();
1856     Abs2 = true;
1857     if (getLexer().isNot(AsmToken::LParen)) {
1858       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1859       return MatchOperand_ParseFail;
1860     }
1861     Parser.Lex();
1862   }
1863 
1864   if (getLexer().getKind() == AsmToken::Pipe) {
1865     if (Abs2) {
1866       Error(Parser.getTok().getLoc(), "expected register or immediate");
1867       return MatchOperand_ParseFail;
1868     }
1869     Parser.Lex();
1870     Abs = true;
1871   }
1872 
1873   OperandMatchResultTy Res;
1874   if (AllowImm) {
1875     Res = parseRegOrImm(Operands, Abs);
1876   } else {
1877     Res = parseReg(Operands);
1878   }
1879   if (Res != MatchOperand_Success) {
1880     return Res;
1881   }
1882 
1883   AMDGPUOperand::Modifiers Mods;
1884   if (Abs) {
1885     if (getLexer().getKind() != AsmToken::Pipe) {
1886       Error(Parser.getTok().getLoc(), "expected vertical bar");
1887       return MatchOperand_ParseFail;
1888     }
1889     Parser.Lex();
1890     Mods.Abs = true;
1891   }
1892   if (Abs2) {
1893     if (getLexer().isNot(AsmToken::RParen)) {
1894       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1895       return MatchOperand_ParseFail;
1896     }
1897     Parser.Lex();
1898     Mods.Abs = true;
1899   }
1900 
1901   if (Negate) {
1902     Mods.Neg = true;
1903   } else if (Negate2) {
1904     if (getLexer().isNot(AsmToken::RParen)) {
1905       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1906       return MatchOperand_ParseFail;
1907     }
1908     Parser.Lex();
1909     Mods.Neg = true;
1910   }
1911 
1912   if (Mods.hasFPModifiers()) {
1913     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1914     Op.setModifiers(Mods);
1915   }
1916   return MatchOperand_Success;
1917 }
1918 
1919 OperandMatchResultTy
1920 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1921                                                bool AllowImm) {
1922   bool Sext = false;
1923 
1924   if (getLexer().getKind() == AsmToken::Identifier &&
1925       Parser.getTok().getString() == "sext") {
1926     Parser.Lex();
1927     Sext = true;
1928     if (getLexer().isNot(AsmToken::LParen)) {
1929       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1930       return MatchOperand_ParseFail;
1931     }
1932     Parser.Lex();
1933   }
1934 
1935   OperandMatchResultTy Res;
1936   if (AllowImm) {
1937     Res = parseRegOrImm(Operands);
1938   } else {
1939     Res = parseReg(Operands);
1940   }
1941   if (Res != MatchOperand_Success) {
1942     return Res;
1943   }
1944 
1945   AMDGPUOperand::Modifiers Mods;
1946   if (Sext) {
1947     if (getLexer().isNot(AsmToken::RParen)) {
1948       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1949       return MatchOperand_ParseFail;
1950     }
1951     Parser.Lex();
1952     Mods.Sext = true;
1953   }
1954 
1955   if (Mods.hasIntModifiers()) {
1956     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1957     Op.setModifiers(Mods);
1958   }
1959 
1960   return MatchOperand_Success;
1961 }
1962 
1963 OperandMatchResultTy
1964 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1965   return parseRegOrImmWithFPInputMods(Operands, false);
1966 }
1967 
1968 OperandMatchResultTy
1969 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1970   return parseRegOrImmWithIntInputMods(Operands, false);
1971 }
1972 
1973 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1974   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1975   if (Reg) {
1976     Operands.push_back(std::move(Reg));
1977     return MatchOperand_Success;
1978   }
1979 
1980   const AsmToken &Tok = Parser.getTok();
1981   if (Tok.getString() == "off") {
1982     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1983                                                 AMDGPUOperand::ImmTyOff, false));
1984     Parser.Lex();
1985     return MatchOperand_Success;
1986   }
1987 
1988   return MatchOperand_NoMatch;
1989 }
1990 
1991 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1992   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1993 
1994   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1995       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1996       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1997       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
1998     return Match_InvalidOperand;
1999 
2000   if ((TSFlags & SIInstrFlags::VOP3) &&
2001       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2002       getForcedEncodingSize() != 64)
2003     return Match_PreferE32;
2004 
2005   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2006       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2007     // v_mac_f32/16 allow only dst_sel == DWORD;
2008     auto OpNum =
2009         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2010     const auto &Op = Inst.getOperand(OpNum);
2011     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2012       return Match_InvalidOperand;
2013     }
2014   }
2015 
2016   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2017     // FIXME: Produces error without correct column reported.
2018     auto OpNum =
2019         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2020     const auto &Op = Inst.getOperand(OpNum);
2021     if (Op.getImm() != 0)
2022       return Match_InvalidOperand;
2023   }
2024 
2025   return Match_Success;
2026 }
2027 
2028 // What asm variants we should check
2029 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2030   if (getForcedEncodingSize() == 32) {
2031     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2032     return makeArrayRef(Variants);
2033   }
2034 
2035   if (isForcedVOP3()) {
2036     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2037     return makeArrayRef(Variants);
2038   }
2039 
2040   if (isForcedSDWA()) {
2041     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2042                                         AMDGPUAsmVariants::SDWA9};
2043     return makeArrayRef(Variants);
2044   }
2045 
2046   if (isForcedDPP()) {
2047     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2048     return makeArrayRef(Variants);
2049   }
2050 
2051   static const unsigned Variants[] = {
2052     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2053     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2054   };
2055 
2056   return makeArrayRef(Variants);
2057 }
2058 
2059 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2060   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2061   const unsigned Num = Desc.getNumImplicitUses();
2062   for (unsigned i = 0; i < Num; ++i) {
2063     unsigned Reg = Desc.ImplicitUses[i];
2064     switch (Reg) {
2065     case AMDGPU::FLAT_SCR:
2066     case AMDGPU::VCC:
2067     case AMDGPU::M0:
2068       return Reg;
2069     default:
2070       break;
2071     }
2072   }
2073   return AMDGPU::NoRegister;
2074 }
2075 
2076 // NB: This code is correct only when used to check constant
2077 // bus limitations because GFX7 support no f16 inline constants.
2078 // Note that there are no cases when a GFX7 opcode violates
2079 // constant bus limitations due to the use of an f16 constant.
2080 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2081                                        unsigned OpIdx) const {
2082   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2083 
2084   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2085     return false;
2086   }
2087 
2088   const MCOperand &MO = Inst.getOperand(OpIdx);
2089 
2090   int64_t Val = MO.getImm();
2091   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2092 
2093   switch (OpSize) { // expected operand size
2094   case 8:
2095     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2096   case 4:
2097     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2098   case 2: {
2099     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2100     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2101         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2102       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2103     } else {
2104       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2105     }
2106   }
2107   default:
2108     llvm_unreachable("invalid operand size");
2109   }
2110 }
2111 
2112 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2113   const MCOperand &MO = Inst.getOperand(OpIdx);
2114   if (MO.isImm()) {
2115     return !isInlineConstant(Inst, OpIdx);
2116   }
2117   return !MO.isReg() ||
2118          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2119 }
2120 
2121 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2122   const unsigned Opcode = Inst.getOpcode();
2123   const MCInstrDesc &Desc = MII.get(Opcode);
2124   unsigned ConstantBusUseCount = 0;
2125 
2126   if (Desc.TSFlags &
2127       (SIInstrFlags::VOPC |
2128        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2129        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2130        SIInstrFlags::SDWA)) {
2131     // Check special imm operands (used by madmk, etc)
2132     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2133       ++ConstantBusUseCount;
2134     }
2135 
2136     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2137     if (SGPRUsed != AMDGPU::NoRegister) {
2138       ++ConstantBusUseCount;
2139     }
2140 
2141     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2142     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2143     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2144 
2145     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2146 
2147     for (int OpIdx : OpIndices) {
2148       if (OpIdx == -1) break;
2149 
2150       const MCOperand &MO = Inst.getOperand(OpIdx);
2151       if (usesConstantBus(Inst, OpIdx)) {
2152         if (MO.isReg()) {
2153           const unsigned Reg = mc2PseudoReg(MO.getReg());
2154           // Pairs of registers with a partial intersections like these
2155           //   s0, s[0:1]
2156           //   flat_scratch_lo, flat_scratch
2157           //   flat_scratch_lo, flat_scratch_hi
2158           // are theoretically valid but they are disabled anyway.
2159           // Note that this code mimics SIInstrInfo::verifyInstruction
2160           if (Reg != SGPRUsed) {
2161             ++ConstantBusUseCount;
2162           }
2163           SGPRUsed = Reg;
2164         } else { // Expression or a literal
2165           ++ConstantBusUseCount;
2166         }
2167       }
2168     }
2169   }
2170 
2171   return ConstantBusUseCount <= 1;
2172 }
2173 
2174 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2175   const unsigned Opcode = Inst.getOpcode();
2176   const MCInstrDesc &Desc = MII.get(Opcode);
2177 
2178   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2179   if (DstIdx == -1 ||
2180       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2181     return true;
2182   }
2183 
2184   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2185 
2186   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2187   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2188   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2189 
2190   assert(DstIdx != -1);
2191   const MCOperand &Dst = Inst.getOperand(DstIdx);
2192   assert(Dst.isReg());
2193   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2194 
2195   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2196 
2197   for (int SrcIdx : SrcIndices) {
2198     if (SrcIdx == -1) break;
2199     const MCOperand &Src = Inst.getOperand(SrcIdx);
2200     if (Src.isReg()) {
2201       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2202       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2203         return false;
2204       }
2205     }
2206   }
2207 
2208   return true;
2209 }
2210 
2211 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2212 
2213   const unsigned Opc = Inst.getOpcode();
2214   const MCInstrDesc &Desc = MII.get(Opc);
2215 
2216   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2217     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2218     assert(ClampIdx != -1);
2219     return Inst.getOperand(ClampIdx).getImm() == 0;
2220   }
2221 
2222   return true;
2223 }
2224 
2225 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2226                                           const SMLoc &IDLoc) {
2227   if (!validateConstantBusLimitations(Inst)) {
2228     Error(IDLoc,
2229       "invalid operand (violates constant bus restrictions)");
2230     return false;
2231   }
2232   if (!validateEarlyClobberLimitations(Inst)) {
2233     Error(IDLoc,
2234       "destination must be different than all sources");
2235     return false;
2236   }
2237   if (!validateIntClampSupported(Inst)) {
2238     Error(IDLoc,
2239       "integer clamping is not supported on this GPU");
2240     return false;
2241   }
2242 
2243   return true;
2244 }
2245 
2246 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2247                                               OperandVector &Operands,
2248                                               MCStreamer &Out,
2249                                               uint64_t &ErrorInfo,
2250                                               bool MatchingInlineAsm) {
2251   MCInst Inst;
2252   unsigned Result = Match_Success;
2253   for (auto Variant : getMatchedVariants()) {
2254     uint64_t EI;
2255     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2256                                   Variant);
2257     // We order match statuses from least to most specific. We use most specific
2258     // status as resulting
2259     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2260     if ((R == Match_Success) ||
2261         (R == Match_PreferE32) ||
2262         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2263         (R == Match_InvalidOperand && Result != Match_MissingFeature
2264                                    && Result != Match_PreferE32) ||
2265         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2266                                    && Result != Match_MissingFeature
2267                                    && Result != Match_PreferE32)) {
2268       Result = R;
2269       ErrorInfo = EI;
2270     }
2271     if (R == Match_Success)
2272       break;
2273   }
2274 
2275   switch (Result) {
2276   default: break;
2277   case Match_Success:
2278     if (!validateInstruction(Inst, IDLoc)) {
2279       return true;
2280     }
2281     Inst.setLoc(IDLoc);
2282     Out.EmitInstruction(Inst, getSTI());
2283     return false;
2284 
2285   case Match_MissingFeature:
2286     return Error(IDLoc, "instruction not supported on this GPU");
2287 
2288   case Match_MnemonicFail:
2289     return Error(IDLoc, "unrecognized instruction mnemonic");
2290 
2291   case Match_InvalidOperand: {
2292     SMLoc ErrorLoc = IDLoc;
2293     if (ErrorInfo != ~0ULL) {
2294       if (ErrorInfo >= Operands.size()) {
2295         return Error(IDLoc, "too few operands for instruction");
2296       }
2297       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2298       if (ErrorLoc == SMLoc())
2299         ErrorLoc = IDLoc;
2300     }
2301     return Error(ErrorLoc, "invalid operand for instruction");
2302   }
2303 
2304   case Match_PreferE32:
2305     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2306                         "should be encoded as e32");
2307   }
2308   llvm_unreachable("Implement any new match types added!");
2309 }
2310 
2311 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2312   int64_t Tmp = -1;
2313   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2314     return true;
2315   }
2316   if (getParser().parseAbsoluteExpression(Tmp)) {
2317     return true;
2318   }
2319   Ret = static_cast<uint32_t>(Tmp);
2320   return false;
2321 }
2322 
2323 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2324                                                uint32_t &Minor) {
2325   if (ParseAsAbsoluteExpression(Major))
2326     return TokError("invalid major version");
2327 
2328   if (getLexer().isNot(AsmToken::Comma))
2329     return TokError("minor version number required, comma expected");
2330   Lex();
2331 
2332   if (ParseAsAbsoluteExpression(Minor))
2333     return TokError("invalid minor version");
2334 
2335   return false;
2336 }
2337 
2338 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2339   uint32_t Major;
2340   uint32_t Minor;
2341 
2342   if (ParseDirectiveMajorMinor(Major, Minor))
2343     return true;
2344 
2345   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2346   return false;
2347 }
2348 
2349 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2350   uint32_t Major;
2351   uint32_t Minor;
2352   uint32_t Stepping;
2353   StringRef VendorName;
2354   StringRef ArchName;
2355 
2356   // If this directive has no arguments, then use the ISA version for the
2357   // targeted GPU.
2358   if (getLexer().is(AsmToken::EndOfStatement)) {
2359     AMDGPU::IsaInfo::IsaVersion ISA =
2360         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2361     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2362                                                       ISA.Stepping,
2363                                                       "AMD", "AMDGPU");
2364     return false;
2365   }
2366 
2367   if (ParseDirectiveMajorMinor(Major, Minor))
2368     return true;
2369 
2370   if (getLexer().isNot(AsmToken::Comma))
2371     return TokError("stepping version number required, comma expected");
2372   Lex();
2373 
2374   if (ParseAsAbsoluteExpression(Stepping))
2375     return TokError("invalid stepping version");
2376 
2377   if (getLexer().isNot(AsmToken::Comma))
2378     return TokError("vendor name required, comma expected");
2379   Lex();
2380 
2381   if (getLexer().isNot(AsmToken::String))
2382     return TokError("invalid vendor name");
2383 
2384   VendorName = getLexer().getTok().getStringContents();
2385   Lex();
2386 
2387   if (getLexer().isNot(AsmToken::Comma))
2388     return TokError("arch name required, comma expected");
2389   Lex();
2390 
2391   if (getLexer().isNot(AsmToken::String))
2392     return TokError("invalid arch name");
2393 
2394   ArchName = getLexer().getTok().getStringContents();
2395   Lex();
2396 
2397   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2398                                                     VendorName, ArchName);
2399   return false;
2400 }
2401 
2402 bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
2403   std::string YamlString;
2404   raw_string_ostream YamlStream(YamlString);
2405 
2406   getLexer().setSkipSpace(false);
2407 
2408   bool FoundEnd = false;
2409   while (!getLexer().is(AsmToken::Eof)) {
2410     while (getLexer().is(AsmToken::Space)) {
2411       YamlStream << getLexer().getTok().getString();
2412       Lex();
2413     }
2414 
2415     if (getLexer().is(AsmToken::Identifier)) {
2416       StringRef ID = getLexer().getTok().getIdentifier();
2417       if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
2418         Lex();
2419         FoundEnd = true;
2420         break;
2421       }
2422     }
2423 
2424     YamlStream << Parser.parseStringToEndOfStatement()
2425                << getContext().getAsmInfo()->getSeparatorString();
2426 
2427     Parser.eatToEndOfStatement();
2428   }
2429 
2430   getLexer().setSkipSpace(true);
2431 
2432   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2433     return TokError(
2434         "expected directive .end_amdgpu_code_object_metadata not found");
2435   }
2436 
2437   YamlStream.flush();
2438 
2439   if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString))
2440     return Error(getParser().getTok().getLoc(), "invalid code object metadata");
2441 
2442   return false;
2443 }
2444 
2445 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2446                                                amd_kernel_code_t &Header) {
2447   SmallString<40> ErrStr;
2448   raw_svector_ostream Err(ErrStr);
2449   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2450     return TokError(Err.str());
2451   }
2452   Lex();
2453   return false;
2454 }
2455 
2456 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2457   amd_kernel_code_t Header;
2458   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2459 
2460   while (true) {
2461     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2462     // will set the current token to EndOfStatement.
2463     while(getLexer().is(AsmToken::EndOfStatement))
2464       Lex();
2465 
2466     if (getLexer().isNot(AsmToken::Identifier))
2467       return TokError("expected value identifier or .end_amd_kernel_code_t");
2468 
2469     StringRef ID = getLexer().getTok().getIdentifier();
2470     Lex();
2471 
2472     if (ID == ".end_amd_kernel_code_t")
2473       break;
2474 
2475     if (ParseAMDKernelCodeTValue(ID, Header))
2476       return true;
2477   }
2478 
2479   getTargetStreamer().EmitAMDKernelCodeT(Header);
2480 
2481   return false;
2482 }
2483 
2484 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2485   if (getLexer().isNot(AsmToken::Identifier))
2486     return TokError("expected symbol name");
2487 
2488   StringRef KernelName = Parser.getTok().getString();
2489 
2490   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2491                                            ELF::STT_AMDGPU_HSA_KERNEL);
2492   Lex();
2493   KernelScope.initialize(getContext());
2494   return false;
2495 }
2496 
2497 bool AMDGPUAsmParser::ParseDirectivePalMetadata() {
2498   std::vector<uint32_t> Data;
2499   for (;;) {
2500     uint32_t Value;
2501     if (ParseAsAbsoluteExpression(Value))
2502       return TokError("invalid value in .amdgpu_pal_metadata");
2503     Data.push_back(Value);
2504     if (getLexer().isNot(AsmToken::Comma))
2505       break;
2506     Lex();
2507   }
2508   getTargetStreamer().EmitPalMetadata(Data);
2509   return false;
2510 }
2511 
2512 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2513   StringRef IDVal = DirectiveID.getString();
2514 
2515   if (IDVal == ".hsa_code_object_version")
2516     return ParseDirectiveHSACodeObjectVersion();
2517 
2518   if (IDVal == ".hsa_code_object_isa")
2519     return ParseDirectiveHSACodeObjectISA();
2520 
2521   if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
2522     return ParseDirectiveCodeObjectMetadata();
2523 
2524   if (IDVal == ".amd_kernel_code_t")
2525     return ParseDirectiveAMDKernelCodeT();
2526 
2527   if (IDVal == ".amdgpu_hsa_kernel")
2528     return ParseDirectiveAMDGPUHsaKernel();
2529 
2530   if (IDVal == ".amdgpu_pal_metadata")
2531     return ParseDirectivePalMetadata();
2532 
2533   return true;
2534 }
2535 
2536 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2537                                            unsigned RegNo) const {
2538   if (isCI())
2539     return true;
2540 
2541   if (isSI()) {
2542     // No flat_scr
2543     switch (RegNo) {
2544     case AMDGPU::FLAT_SCR:
2545     case AMDGPU::FLAT_SCR_LO:
2546     case AMDGPU::FLAT_SCR_HI:
2547       return false;
2548     default:
2549       return true;
2550     }
2551   }
2552 
2553   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2554   // SI/CI have.
2555   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2556        R.isValid(); ++R) {
2557     if (*R == RegNo)
2558       return false;
2559   }
2560 
2561   return true;
2562 }
2563 
2564 OperandMatchResultTy
2565 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2566   // Try to parse with a custom parser
2567   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2568 
2569   // If we successfully parsed the operand or if there as an error parsing,
2570   // we are done.
2571   //
2572   // If we are parsing after we reach EndOfStatement then this means we
2573   // are appending default values to the Operands list.  This is only done
2574   // by custom parser, so we shouldn't continue on to the generic parsing.
2575   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2576       getLexer().is(AsmToken::EndOfStatement))
2577     return ResTy;
2578 
2579   ResTy = parseRegOrImm(Operands);
2580 
2581   if (ResTy == MatchOperand_Success)
2582     return ResTy;
2583 
2584   const auto &Tok = Parser.getTok();
2585   SMLoc S = Tok.getLoc();
2586 
2587   const MCExpr *Expr = nullptr;
2588   if (!Parser.parseExpression(Expr)) {
2589     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2590     return MatchOperand_Success;
2591   }
2592 
2593   // Possibly this is an instruction flag like 'gds'.
2594   if (Tok.getKind() == AsmToken::Identifier) {
2595     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2596     Parser.Lex();
2597     return MatchOperand_Success;
2598   }
2599 
2600   return MatchOperand_NoMatch;
2601 }
2602 
2603 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2604   // Clear any forced encodings from the previous instruction.
2605   setForcedEncodingSize(0);
2606   setForcedDPP(false);
2607   setForcedSDWA(false);
2608 
2609   if (Name.endswith("_e64")) {
2610     setForcedEncodingSize(64);
2611     return Name.substr(0, Name.size() - 4);
2612   } else if (Name.endswith("_e32")) {
2613     setForcedEncodingSize(32);
2614     return Name.substr(0, Name.size() - 4);
2615   } else if (Name.endswith("_dpp")) {
2616     setForcedDPP(true);
2617     return Name.substr(0, Name.size() - 4);
2618   } else if (Name.endswith("_sdwa")) {
2619     setForcedSDWA(true);
2620     return Name.substr(0, Name.size() - 5);
2621   }
2622   return Name;
2623 }
2624 
2625 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2626                                        StringRef Name,
2627                                        SMLoc NameLoc, OperandVector &Operands) {
2628   // Add the instruction mnemonic
2629   Name = parseMnemonicSuffix(Name);
2630   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2631 
2632   while (!getLexer().is(AsmToken::EndOfStatement)) {
2633     OperandMatchResultTy Res = parseOperand(Operands, Name);
2634 
2635     // Eat the comma or space if there is one.
2636     if (getLexer().is(AsmToken::Comma))
2637       Parser.Lex();
2638 
2639     switch (Res) {
2640       case MatchOperand_Success: break;
2641       case MatchOperand_ParseFail:
2642         Error(getLexer().getLoc(), "failed parsing operand.");
2643         while (!getLexer().is(AsmToken::EndOfStatement)) {
2644           Parser.Lex();
2645         }
2646         return true;
2647       case MatchOperand_NoMatch:
2648         Error(getLexer().getLoc(), "not a valid operand.");
2649         while (!getLexer().is(AsmToken::EndOfStatement)) {
2650           Parser.Lex();
2651         }
2652         return true;
2653     }
2654   }
2655 
2656   return false;
2657 }
2658 
2659 //===----------------------------------------------------------------------===//
2660 // Utility functions
2661 //===----------------------------------------------------------------------===//
2662 
2663 OperandMatchResultTy
2664 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2665   switch(getLexer().getKind()) {
2666     default: return MatchOperand_NoMatch;
2667     case AsmToken::Identifier: {
2668       StringRef Name = Parser.getTok().getString();
2669       if (!Name.equals(Prefix)) {
2670         return MatchOperand_NoMatch;
2671       }
2672 
2673       Parser.Lex();
2674       if (getLexer().isNot(AsmToken::Colon))
2675         return MatchOperand_ParseFail;
2676 
2677       Parser.Lex();
2678 
2679       bool IsMinus = false;
2680       if (getLexer().getKind() == AsmToken::Minus) {
2681         Parser.Lex();
2682         IsMinus = true;
2683       }
2684 
2685       if (getLexer().isNot(AsmToken::Integer))
2686         return MatchOperand_ParseFail;
2687 
2688       if (getParser().parseAbsoluteExpression(Int))
2689         return MatchOperand_ParseFail;
2690 
2691       if (IsMinus)
2692         Int = -Int;
2693       break;
2694     }
2695   }
2696   return MatchOperand_Success;
2697 }
2698 
2699 OperandMatchResultTy
2700 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2701                                     AMDGPUOperand::ImmTy ImmTy,
2702                                     bool (*ConvertResult)(int64_t&)) {
2703   SMLoc S = Parser.getTok().getLoc();
2704   int64_t Value = 0;
2705 
2706   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2707   if (Res != MatchOperand_Success)
2708     return Res;
2709 
2710   if (ConvertResult && !ConvertResult(Value)) {
2711     return MatchOperand_ParseFail;
2712   }
2713 
2714   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2715   return MatchOperand_Success;
2716 }
2717 
2718 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2719   const char *Prefix,
2720   OperandVector &Operands,
2721   AMDGPUOperand::ImmTy ImmTy,
2722   bool (*ConvertResult)(int64_t&)) {
2723   StringRef Name = Parser.getTok().getString();
2724   if (!Name.equals(Prefix))
2725     return MatchOperand_NoMatch;
2726 
2727   Parser.Lex();
2728   if (getLexer().isNot(AsmToken::Colon))
2729     return MatchOperand_ParseFail;
2730 
2731   Parser.Lex();
2732   if (getLexer().isNot(AsmToken::LBrac))
2733     return MatchOperand_ParseFail;
2734   Parser.Lex();
2735 
2736   unsigned Val = 0;
2737   SMLoc S = Parser.getTok().getLoc();
2738 
2739   // FIXME: How to verify the number of elements matches the number of src
2740   // operands?
2741   for (int I = 0; I < 4; ++I) {
2742     if (I != 0) {
2743       if (getLexer().is(AsmToken::RBrac))
2744         break;
2745 
2746       if (getLexer().isNot(AsmToken::Comma))
2747         return MatchOperand_ParseFail;
2748       Parser.Lex();
2749     }
2750 
2751     if (getLexer().isNot(AsmToken::Integer))
2752       return MatchOperand_ParseFail;
2753 
2754     int64_t Op;
2755     if (getParser().parseAbsoluteExpression(Op))
2756       return MatchOperand_ParseFail;
2757 
2758     if (Op != 0 && Op != 1)
2759       return MatchOperand_ParseFail;
2760     Val |= (Op << I);
2761   }
2762 
2763   Parser.Lex();
2764   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2765   return MatchOperand_Success;
2766 }
2767 
2768 OperandMatchResultTy
2769 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2770                                AMDGPUOperand::ImmTy ImmTy) {
2771   int64_t Bit = 0;
2772   SMLoc S = Parser.getTok().getLoc();
2773 
2774   // We are at the end of the statement, and this is a default argument, so
2775   // use a default value.
2776   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2777     switch(getLexer().getKind()) {
2778       case AsmToken::Identifier: {
2779         StringRef Tok = Parser.getTok().getString();
2780         if (Tok == Name) {
2781           Bit = 1;
2782           Parser.Lex();
2783         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2784           Bit = 0;
2785           Parser.Lex();
2786         } else {
2787           return MatchOperand_NoMatch;
2788         }
2789         break;
2790       }
2791       default:
2792         return MatchOperand_NoMatch;
2793     }
2794   }
2795 
2796   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2797   return MatchOperand_Success;
2798 }
2799 
2800 static void addOptionalImmOperand(
2801   MCInst& Inst, const OperandVector& Operands,
2802   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2803   AMDGPUOperand::ImmTy ImmT,
2804   int64_t Default = 0) {
2805   auto i = OptionalIdx.find(ImmT);
2806   if (i != OptionalIdx.end()) {
2807     unsigned Idx = i->second;
2808     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2809   } else {
2810     Inst.addOperand(MCOperand::createImm(Default));
2811   }
2812 }
2813 
2814 OperandMatchResultTy
2815 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2816   if (getLexer().isNot(AsmToken::Identifier)) {
2817     return MatchOperand_NoMatch;
2818   }
2819   StringRef Tok = Parser.getTok().getString();
2820   if (Tok != Prefix) {
2821     return MatchOperand_NoMatch;
2822   }
2823 
2824   Parser.Lex();
2825   if (getLexer().isNot(AsmToken::Colon)) {
2826     return MatchOperand_ParseFail;
2827   }
2828 
2829   Parser.Lex();
2830   if (getLexer().isNot(AsmToken::Identifier)) {
2831     return MatchOperand_ParseFail;
2832   }
2833 
2834   Value = Parser.getTok().getString();
2835   return MatchOperand_Success;
2836 }
2837 
2838 //===----------------------------------------------------------------------===//
2839 // ds
2840 //===----------------------------------------------------------------------===//
2841 
2842 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2843                                     const OperandVector &Operands) {
2844   OptionalImmIndexMap OptionalIdx;
2845 
2846   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2847     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2848 
2849     // Add the register arguments
2850     if (Op.isReg()) {
2851       Op.addRegOperands(Inst, 1);
2852       continue;
2853     }
2854 
2855     // Handle optional arguments
2856     OptionalIdx[Op.getImmTy()] = i;
2857   }
2858 
2859   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2860   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2861   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2862 
2863   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2864 }
2865 
2866 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2867                                 bool IsGdsHardcoded) {
2868   OptionalImmIndexMap OptionalIdx;
2869 
2870   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2871     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2872 
2873     // Add the register arguments
2874     if (Op.isReg()) {
2875       Op.addRegOperands(Inst, 1);
2876       continue;
2877     }
2878 
2879     if (Op.isToken() && Op.getToken() == "gds") {
2880       IsGdsHardcoded = true;
2881       continue;
2882     }
2883 
2884     // Handle optional arguments
2885     OptionalIdx[Op.getImmTy()] = i;
2886   }
2887 
2888   AMDGPUOperand::ImmTy OffsetType =
2889     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2890      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2891                                                       AMDGPUOperand::ImmTyOffset;
2892 
2893   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2894 
2895   if (!IsGdsHardcoded) {
2896     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2897   }
2898   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2899 }
2900 
2901 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2902   OptionalImmIndexMap OptionalIdx;
2903 
2904   unsigned OperandIdx[4];
2905   unsigned EnMask = 0;
2906   int SrcIdx = 0;
2907 
2908   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2909     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2910 
2911     // Add the register arguments
2912     if (Op.isReg()) {
2913       assert(SrcIdx < 4);
2914       OperandIdx[SrcIdx] = Inst.size();
2915       Op.addRegOperands(Inst, 1);
2916       ++SrcIdx;
2917       continue;
2918     }
2919 
2920     if (Op.isOff()) {
2921       assert(SrcIdx < 4);
2922       OperandIdx[SrcIdx] = Inst.size();
2923       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2924       ++SrcIdx;
2925       continue;
2926     }
2927 
2928     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2929       Op.addImmOperands(Inst, 1);
2930       continue;
2931     }
2932 
2933     if (Op.isToken() && Op.getToken() == "done")
2934       continue;
2935 
2936     // Handle optional arguments
2937     OptionalIdx[Op.getImmTy()] = i;
2938   }
2939 
2940   assert(SrcIdx == 4);
2941 
2942   bool Compr = false;
2943   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2944     Compr = true;
2945     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2946     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2947     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2948   }
2949 
2950   for (auto i = 0; i < SrcIdx; ++i) {
2951     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2952       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2953     }
2954   }
2955 
2956   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
2957   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
2958 
2959   Inst.addOperand(MCOperand::createImm(EnMask));
2960 }
2961 
2962 //===----------------------------------------------------------------------===//
2963 // s_waitcnt
2964 //===----------------------------------------------------------------------===//
2965 
2966 static bool
2967 encodeCnt(
2968   const AMDGPU::IsaInfo::IsaVersion ISA,
2969   int64_t &IntVal,
2970   int64_t CntVal,
2971   bool Saturate,
2972   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
2973   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
2974 {
2975   bool Failed = false;
2976 
2977   IntVal = encode(ISA, IntVal, CntVal);
2978   if (CntVal != decode(ISA, IntVal)) {
2979     if (Saturate) {
2980       IntVal = encode(ISA, IntVal, -1);
2981     } else {
2982       Failed = true;
2983     }
2984   }
2985   return Failed;
2986 }
2987 
2988 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
2989   StringRef CntName = Parser.getTok().getString();
2990   int64_t CntVal;
2991 
2992   Parser.Lex();
2993   if (getLexer().isNot(AsmToken::LParen))
2994     return true;
2995 
2996   Parser.Lex();
2997   if (getLexer().isNot(AsmToken::Integer))
2998     return true;
2999 
3000   SMLoc ValLoc = Parser.getTok().getLoc();
3001   if (getParser().parseAbsoluteExpression(CntVal))
3002     return true;
3003 
3004   AMDGPU::IsaInfo::IsaVersion ISA =
3005       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3006 
3007   bool Failed = true;
3008   bool Sat = CntName.endswith("_sat");
3009 
3010   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3011     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3012   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3013     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3014   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3015     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3016   }
3017 
3018   if (Failed) {
3019     Error(ValLoc, "too large value for " + CntName);
3020     return true;
3021   }
3022 
3023   if (getLexer().isNot(AsmToken::RParen)) {
3024     return true;
3025   }
3026 
3027   Parser.Lex();
3028   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3029     const AsmToken NextToken = getLexer().peekTok();
3030     if (NextToken.is(AsmToken::Identifier)) {
3031       Parser.Lex();
3032     }
3033   }
3034 
3035   return false;
3036 }
3037 
3038 OperandMatchResultTy
3039 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3040   AMDGPU::IsaInfo::IsaVersion ISA =
3041       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3042   int64_t Waitcnt = getWaitcntBitMask(ISA);
3043   SMLoc S = Parser.getTok().getLoc();
3044 
3045   switch(getLexer().getKind()) {
3046     default: return MatchOperand_ParseFail;
3047     case AsmToken::Integer:
3048       // The operand can be an integer value.
3049       if (getParser().parseAbsoluteExpression(Waitcnt))
3050         return MatchOperand_ParseFail;
3051       break;
3052 
3053     case AsmToken::Identifier:
3054       do {
3055         if (parseCnt(Waitcnt))
3056           return MatchOperand_ParseFail;
3057       } while(getLexer().isNot(AsmToken::EndOfStatement));
3058       break;
3059   }
3060   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3061   return MatchOperand_Success;
3062 }
3063 
3064 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3065                                           int64_t &Width) {
3066   using namespace llvm::AMDGPU::Hwreg;
3067 
3068   if (Parser.getTok().getString() != "hwreg")
3069     return true;
3070   Parser.Lex();
3071 
3072   if (getLexer().isNot(AsmToken::LParen))
3073     return true;
3074   Parser.Lex();
3075 
3076   if (getLexer().is(AsmToken::Identifier)) {
3077     HwReg.IsSymbolic = true;
3078     HwReg.Id = ID_UNKNOWN_;
3079     const StringRef tok = Parser.getTok().getString();
3080     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3081       if (tok == IdSymbolic[i]) {
3082         HwReg.Id = i;
3083         break;
3084       }
3085     }
3086     Parser.Lex();
3087   } else {
3088     HwReg.IsSymbolic = false;
3089     if (getLexer().isNot(AsmToken::Integer))
3090       return true;
3091     if (getParser().parseAbsoluteExpression(HwReg.Id))
3092       return true;
3093   }
3094 
3095   if (getLexer().is(AsmToken::RParen)) {
3096     Parser.Lex();
3097     return false;
3098   }
3099 
3100   // optional params
3101   if (getLexer().isNot(AsmToken::Comma))
3102     return true;
3103   Parser.Lex();
3104 
3105   if (getLexer().isNot(AsmToken::Integer))
3106     return true;
3107   if (getParser().parseAbsoluteExpression(Offset))
3108     return true;
3109 
3110   if (getLexer().isNot(AsmToken::Comma))
3111     return true;
3112   Parser.Lex();
3113 
3114   if (getLexer().isNot(AsmToken::Integer))
3115     return true;
3116   if (getParser().parseAbsoluteExpression(Width))
3117     return true;
3118 
3119   if (getLexer().isNot(AsmToken::RParen))
3120     return true;
3121   Parser.Lex();
3122 
3123   return false;
3124 }
3125 
3126 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3127   using namespace llvm::AMDGPU::Hwreg;
3128 
3129   int64_t Imm16Val = 0;
3130   SMLoc S = Parser.getTok().getLoc();
3131 
3132   switch(getLexer().getKind()) {
3133     default: return MatchOperand_NoMatch;
3134     case AsmToken::Integer:
3135       // The operand can be an integer value.
3136       if (getParser().parseAbsoluteExpression(Imm16Val))
3137         return MatchOperand_NoMatch;
3138       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3139         Error(S, "invalid immediate: only 16-bit values are legal");
3140         // Do not return error code, but create an imm operand anyway and proceed
3141         // to the next operand, if any. That avoids unneccessary error messages.
3142       }
3143       break;
3144 
3145     case AsmToken::Identifier: {
3146         OperandInfoTy HwReg(ID_UNKNOWN_);
3147         int64_t Offset = OFFSET_DEFAULT_;
3148         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3149         if (parseHwregConstruct(HwReg, Offset, Width))
3150           return MatchOperand_ParseFail;
3151         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3152           if (HwReg.IsSymbolic)
3153             Error(S, "invalid symbolic name of hardware register");
3154           else
3155             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3156         }
3157         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3158           Error(S, "invalid bit offset: only 5-bit values are legal");
3159         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3160           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3161         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3162       }
3163       break;
3164   }
3165   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3166   return MatchOperand_Success;
3167 }
3168 
3169 bool AMDGPUOperand::isSWaitCnt() const {
3170   return isImm();
3171 }
3172 
3173 bool AMDGPUOperand::isHwreg() const {
3174   return isImmTy(ImmTyHwreg);
3175 }
3176 
3177 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3178   using namespace llvm::AMDGPU::SendMsg;
3179 
3180   if (Parser.getTok().getString() != "sendmsg")
3181     return true;
3182   Parser.Lex();
3183 
3184   if (getLexer().isNot(AsmToken::LParen))
3185     return true;
3186   Parser.Lex();
3187 
3188   if (getLexer().is(AsmToken::Identifier)) {
3189     Msg.IsSymbolic = true;
3190     Msg.Id = ID_UNKNOWN_;
3191     const std::string tok = Parser.getTok().getString();
3192     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3193       switch(i) {
3194         default: continue; // Omit gaps.
3195         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3196       }
3197       if (tok == IdSymbolic[i]) {
3198         Msg.Id = i;
3199         break;
3200       }
3201     }
3202     Parser.Lex();
3203   } else {
3204     Msg.IsSymbolic = false;
3205     if (getLexer().isNot(AsmToken::Integer))
3206       return true;
3207     if (getParser().parseAbsoluteExpression(Msg.Id))
3208       return true;
3209     if (getLexer().is(AsmToken::Integer))
3210       if (getParser().parseAbsoluteExpression(Msg.Id))
3211         Msg.Id = ID_UNKNOWN_;
3212   }
3213   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3214     return false;
3215 
3216   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3217     if (getLexer().isNot(AsmToken::RParen))
3218       return true;
3219     Parser.Lex();
3220     return false;
3221   }
3222 
3223   if (getLexer().isNot(AsmToken::Comma))
3224     return true;
3225   Parser.Lex();
3226 
3227   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3228   Operation.Id = ID_UNKNOWN_;
3229   if (getLexer().is(AsmToken::Identifier)) {
3230     Operation.IsSymbolic = true;
3231     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3232     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3233     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3234     const StringRef Tok = Parser.getTok().getString();
3235     for (int i = F; i < L; ++i) {
3236       if (Tok == S[i]) {
3237         Operation.Id = i;
3238         break;
3239       }
3240     }
3241     Parser.Lex();
3242   } else {
3243     Operation.IsSymbolic = false;
3244     if (getLexer().isNot(AsmToken::Integer))
3245       return true;
3246     if (getParser().parseAbsoluteExpression(Operation.Id))
3247       return true;
3248   }
3249 
3250   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3251     // Stream id is optional.
3252     if (getLexer().is(AsmToken::RParen)) {
3253       Parser.Lex();
3254       return false;
3255     }
3256 
3257     if (getLexer().isNot(AsmToken::Comma))
3258       return true;
3259     Parser.Lex();
3260 
3261     if (getLexer().isNot(AsmToken::Integer))
3262       return true;
3263     if (getParser().parseAbsoluteExpression(StreamId))
3264       return true;
3265   }
3266 
3267   if (getLexer().isNot(AsmToken::RParen))
3268     return true;
3269   Parser.Lex();
3270   return false;
3271 }
3272 
3273 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3274   if (getLexer().getKind() != AsmToken::Identifier)
3275     return MatchOperand_NoMatch;
3276 
3277   StringRef Str = Parser.getTok().getString();
3278   int Slot = StringSwitch<int>(Str)
3279     .Case("p10", 0)
3280     .Case("p20", 1)
3281     .Case("p0", 2)
3282     .Default(-1);
3283 
3284   SMLoc S = Parser.getTok().getLoc();
3285   if (Slot == -1)
3286     return MatchOperand_ParseFail;
3287 
3288   Parser.Lex();
3289   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3290                                               AMDGPUOperand::ImmTyInterpSlot));
3291   return MatchOperand_Success;
3292 }
3293 
3294 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3295   if (getLexer().getKind() != AsmToken::Identifier)
3296     return MatchOperand_NoMatch;
3297 
3298   StringRef Str = Parser.getTok().getString();
3299   if (!Str.startswith("attr"))
3300     return MatchOperand_NoMatch;
3301 
3302   StringRef Chan = Str.take_back(2);
3303   int AttrChan = StringSwitch<int>(Chan)
3304     .Case(".x", 0)
3305     .Case(".y", 1)
3306     .Case(".z", 2)
3307     .Case(".w", 3)
3308     .Default(-1);
3309   if (AttrChan == -1)
3310     return MatchOperand_ParseFail;
3311 
3312   Str = Str.drop_back(2).drop_front(4);
3313 
3314   uint8_t Attr;
3315   if (Str.getAsInteger(10, Attr))
3316     return MatchOperand_ParseFail;
3317 
3318   SMLoc S = Parser.getTok().getLoc();
3319   Parser.Lex();
3320   if (Attr > 63) {
3321     Error(S, "out of bounds attr");
3322     return MatchOperand_Success;
3323   }
3324 
3325   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3326 
3327   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3328                                               AMDGPUOperand::ImmTyInterpAttr));
3329   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3330                                               AMDGPUOperand::ImmTyAttrChan));
3331   return MatchOperand_Success;
3332 }
3333 
3334 void AMDGPUAsmParser::errorExpTgt() {
3335   Error(Parser.getTok().getLoc(), "invalid exp target");
3336 }
3337 
3338 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3339                                                       uint8_t &Val) {
3340   if (Str == "null") {
3341     Val = 9;
3342     return MatchOperand_Success;
3343   }
3344 
3345   if (Str.startswith("mrt")) {
3346     Str = Str.drop_front(3);
3347     if (Str == "z") { // == mrtz
3348       Val = 8;
3349       return MatchOperand_Success;
3350     }
3351 
3352     if (Str.getAsInteger(10, Val))
3353       return MatchOperand_ParseFail;
3354 
3355     if (Val > 7)
3356       errorExpTgt();
3357 
3358     return MatchOperand_Success;
3359   }
3360 
3361   if (Str.startswith("pos")) {
3362     Str = Str.drop_front(3);
3363     if (Str.getAsInteger(10, Val))
3364       return MatchOperand_ParseFail;
3365 
3366     if (Val > 3)
3367       errorExpTgt();
3368 
3369     Val += 12;
3370     return MatchOperand_Success;
3371   }
3372 
3373   if (Str.startswith("param")) {
3374     Str = Str.drop_front(5);
3375     if (Str.getAsInteger(10, Val))
3376       return MatchOperand_ParseFail;
3377 
3378     if (Val >= 32)
3379       errorExpTgt();
3380 
3381     Val += 32;
3382     return MatchOperand_Success;
3383   }
3384 
3385   if (Str.startswith("invalid_target_")) {
3386     Str = Str.drop_front(15);
3387     if (Str.getAsInteger(10, Val))
3388       return MatchOperand_ParseFail;
3389 
3390     errorExpTgt();
3391     return MatchOperand_Success;
3392   }
3393 
3394   return MatchOperand_NoMatch;
3395 }
3396 
3397 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3398   uint8_t Val;
3399   StringRef Str = Parser.getTok().getString();
3400 
3401   auto Res = parseExpTgtImpl(Str, Val);
3402   if (Res != MatchOperand_Success)
3403     return Res;
3404 
3405   SMLoc S = Parser.getTok().getLoc();
3406   Parser.Lex();
3407 
3408   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3409                                               AMDGPUOperand::ImmTyExpTgt));
3410   return MatchOperand_Success;
3411 }
3412 
3413 OperandMatchResultTy
3414 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3415   using namespace llvm::AMDGPU::SendMsg;
3416 
3417   int64_t Imm16Val = 0;
3418   SMLoc S = Parser.getTok().getLoc();
3419 
3420   switch(getLexer().getKind()) {
3421   default:
3422     return MatchOperand_NoMatch;
3423   case AsmToken::Integer:
3424     // The operand can be an integer value.
3425     if (getParser().parseAbsoluteExpression(Imm16Val))
3426       return MatchOperand_NoMatch;
3427     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3428       Error(S, "invalid immediate: only 16-bit values are legal");
3429       // Do not return error code, but create an imm operand anyway and proceed
3430       // to the next operand, if any. That avoids unneccessary error messages.
3431     }
3432     break;
3433   case AsmToken::Identifier: {
3434       OperandInfoTy Msg(ID_UNKNOWN_);
3435       OperandInfoTy Operation(OP_UNKNOWN_);
3436       int64_t StreamId = STREAM_ID_DEFAULT_;
3437       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3438         return MatchOperand_ParseFail;
3439       do {
3440         // Validate and encode message ID.
3441         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3442                 || Msg.Id == ID_SYSMSG)) {
3443           if (Msg.IsSymbolic)
3444             Error(S, "invalid/unsupported symbolic name of message");
3445           else
3446             Error(S, "invalid/unsupported code of message");
3447           break;
3448         }
3449         Imm16Val = (Msg.Id << ID_SHIFT_);
3450         // Validate and encode operation ID.
3451         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3452           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3453             if (Operation.IsSymbolic)
3454               Error(S, "invalid symbolic name of GS_OP");
3455             else
3456               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3457             break;
3458           }
3459           if (Operation.Id == OP_GS_NOP
3460               && Msg.Id != ID_GS_DONE) {
3461             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3462             break;
3463           }
3464           Imm16Val |= (Operation.Id << OP_SHIFT_);
3465         }
3466         if (Msg.Id == ID_SYSMSG) {
3467           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3468             if (Operation.IsSymbolic)
3469               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3470             else
3471               Error(S, "invalid/unsupported code of SYSMSG_OP");
3472             break;
3473           }
3474           Imm16Val |= (Operation.Id << OP_SHIFT_);
3475         }
3476         // Validate and encode stream ID.
3477         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3478           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3479             Error(S, "invalid stream id: only 2-bit values are legal");
3480             break;
3481           }
3482           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3483         }
3484       } while (false);
3485     }
3486     break;
3487   }
3488   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3489   return MatchOperand_Success;
3490 }
3491 
3492 bool AMDGPUOperand::isSendMsg() const {
3493   return isImmTy(ImmTySendMsg);
3494 }
3495 
3496 //===----------------------------------------------------------------------===//
3497 // parser helpers
3498 //===----------------------------------------------------------------------===//
3499 
3500 bool
3501 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3502   if (getLexer().getKind() == AsmToken::Identifier &&
3503       Parser.getTok().getString() == Id) {
3504     Parser.Lex();
3505     return true;
3506   }
3507   return false;
3508 }
3509 
3510 bool
3511 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3512   if (getLexer().getKind() == Kind) {
3513     Parser.Lex();
3514     return true;
3515   }
3516   return false;
3517 }
3518 
3519 bool
3520 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3521                            const StringRef ErrMsg) {
3522   if (!trySkipToken(Kind)) {
3523     Error(Parser.getTok().getLoc(), ErrMsg);
3524     return false;
3525   }
3526   return true;
3527 }
3528 
3529 bool
3530 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3531   return !getParser().parseAbsoluteExpression(Imm);
3532 }
3533 
3534 bool
3535 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3536   SMLoc S = Parser.getTok().getLoc();
3537   if (getLexer().getKind() == AsmToken::String) {
3538     Val = Parser.getTok().getStringContents();
3539     Parser.Lex();
3540     return true;
3541   } else {
3542     Error(S, ErrMsg);
3543     return false;
3544   }
3545 }
3546 
3547 //===----------------------------------------------------------------------===//
3548 // swizzle
3549 //===----------------------------------------------------------------------===//
3550 
3551 LLVM_READNONE
3552 static unsigned
3553 encodeBitmaskPerm(const unsigned AndMask,
3554                   const unsigned OrMask,
3555                   const unsigned XorMask) {
3556   using namespace llvm::AMDGPU::Swizzle;
3557 
3558   return BITMASK_PERM_ENC |
3559          (AndMask << BITMASK_AND_SHIFT) |
3560          (OrMask  << BITMASK_OR_SHIFT)  |
3561          (XorMask << BITMASK_XOR_SHIFT);
3562 }
3563 
3564 bool
3565 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3566                                       const unsigned MinVal,
3567                                       const unsigned MaxVal,
3568                                       const StringRef ErrMsg) {
3569   for (unsigned i = 0; i < OpNum; ++i) {
3570     if (!skipToken(AsmToken::Comma, "expected a comma")){
3571       return false;
3572     }
3573     SMLoc ExprLoc = Parser.getTok().getLoc();
3574     if (!parseExpr(Op[i])) {
3575       return false;
3576     }
3577     if (Op[i] < MinVal || Op[i] > MaxVal) {
3578       Error(ExprLoc, ErrMsg);
3579       return false;
3580     }
3581   }
3582 
3583   return true;
3584 }
3585 
3586 bool
3587 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3588   using namespace llvm::AMDGPU::Swizzle;
3589 
3590   int64_t Lane[LANE_NUM];
3591   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3592                            "expected a 2-bit lane id")) {
3593     Imm = QUAD_PERM_ENC;
3594     for (auto i = 0; i < LANE_NUM; ++i) {
3595       Imm |= Lane[i] << (LANE_SHIFT * i);
3596     }
3597     return true;
3598   }
3599   return false;
3600 }
3601 
3602 bool
3603 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3604   using namespace llvm::AMDGPU::Swizzle;
3605 
3606   SMLoc S = Parser.getTok().getLoc();
3607   int64_t GroupSize;
3608   int64_t LaneIdx;
3609 
3610   if (!parseSwizzleOperands(1, &GroupSize,
3611                             2, 32,
3612                             "group size must be in the interval [2,32]")) {
3613     return false;
3614   }
3615   if (!isPowerOf2_64(GroupSize)) {
3616     Error(S, "group size must be a power of two");
3617     return false;
3618   }
3619   if (parseSwizzleOperands(1, &LaneIdx,
3620                            0, GroupSize - 1,
3621                            "lane id must be in the interval [0,group size - 1]")) {
3622     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3623     return true;
3624   }
3625   return false;
3626 }
3627 
3628 bool
3629 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3630   using namespace llvm::AMDGPU::Swizzle;
3631 
3632   SMLoc S = Parser.getTok().getLoc();
3633   int64_t GroupSize;
3634 
3635   if (!parseSwizzleOperands(1, &GroupSize,
3636       2, 32, "group size must be in the interval [2,32]")) {
3637     return false;
3638   }
3639   if (!isPowerOf2_64(GroupSize)) {
3640     Error(S, "group size must be a power of two");
3641     return false;
3642   }
3643 
3644   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3645   return true;
3646 }
3647 
3648 bool
3649 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3650   using namespace llvm::AMDGPU::Swizzle;
3651 
3652   SMLoc S = Parser.getTok().getLoc();
3653   int64_t GroupSize;
3654 
3655   if (!parseSwizzleOperands(1, &GroupSize,
3656       1, 16, "group size must be in the interval [1,16]")) {
3657     return false;
3658   }
3659   if (!isPowerOf2_64(GroupSize)) {
3660     Error(S, "group size must be a power of two");
3661     return false;
3662   }
3663 
3664   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3665   return true;
3666 }
3667 
3668 bool
3669 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3670   using namespace llvm::AMDGPU::Swizzle;
3671 
3672   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3673     return false;
3674   }
3675 
3676   StringRef Ctl;
3677   SMLoc StrLoc = Parser.getTok().getLoc();
3678   if (!parseString(Ctl)) {
3679     return false;
3680   }
3681   if (Ctl.size() != BITMASK_WIDTH) {
3682     Error(StrLoc, "expected a 5-character mask");
3683     return false;
3684   }
3685 
3686   unsigned AndMask = 0;
3687   unsigned OrMask = 0;
3688   unsigned XorMask = 0;
3689 
3690   for (size_t i = 0; i < Ctl.size(); ++i) {
3691     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3692     switch(Ctl[i]) {
3693     default:
3694       Error(StrLoc, "invalid mask");
3695       return false;
3696     case '0':
3697       break;
3698     case '1':
3699       OrMask |= Mask;
3700       break;
3701     case 'p':
3702       AndMask |= Mask;
3703       break;
3704     case 'i':
3705       AndMask |= Mask;
3706       XorMask |= Mask;
3707       break;
3708     }
3709   }
3710 
3711   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3712   return true;
3713 }
3714 
3715 bool
3716 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3717 
3718   SMLoc OffsetLoc = Parser.getTok().getLoc();
3719 
3720   if (!parseExpr(Imm)) {
3721     return false;
3722   }
3723   if (!isUInt<16>(Imm)) {
3724     Error(OffsetLoc, "expected a 16-bit offset");
3725     return false;
3726   }
3727   return true;
3728 }
3729 
3730 bool
3731 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3732   using namespace llvm::AMDGPU::Swizzle;
3733 
3734   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3735 
3736     SMLoc ModeLoc = Parser.getTok().getLoc();
3737     bool Ok = false;
3738 
3739     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3740       Ok = parseSwizzleQuadPerm(Imm);
3741     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3742       Ok = parseSwizzleBitmaskPerm(Imm);
3743     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3744       Ok = parseSwizzleBroadcast(Imm);
3745     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3746       Ok = parseSwizzleSwap(Imm);
3747     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3748       Ok = parseSwizzleReverse(Imm);
3749     } else {
3750       Error(ModeLoc, "expected a swizzle mode");
3751     }
3752 
3753     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3754   }
3755 
3756   return false;
3757 }
3758 
3759 OperandMatchResultTy
3760 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3761   SMLoc S = Parser.getTok().getLoc();
3762   int64_t Imm = 0;
3763 
3764   if (trySkipId("offset")) {
3765 
3766     bool Ok = false;
3767     if (skipToken(AsmToken::Colon, "expected a colon")) {
3768       if (trySkipId("swizzle")) {
3769         Ok = parseSwizzleMacro(Imm);
3770       } else {
3771         Ok = parseSwizzleOffset(Imm);
3772       }
3773     }
3774 
3775     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3776 
3777     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3778   } else {
3779     return MatchOperand_NoMatch;
3780   }
3781 }
3782 
3783 bool
3784 AMDGPUOperand::isSwizzle() const {
3785   return isImmTy(ImmTySwizzle);
3786 }
3787 
3788 //===----------------------------------------------------------------------===//
3789 // sopp branch targets
3790 //===----------------------------------------------------------------------===//
3791 
3792 OperandMatchResultTy
3793 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3794   SMLoc S = Parser.getTok().getLoc();
3795 
3796   switch (getLexer().getKind()) {
3797     default: return MatchOperand_ParseFail;
3798     case AsmToken::Integer: {
3799       int64_t Imm;
3800       if (getParser().parseAbsoluteExpression(Imm))
3801         return MatchOperand_ParseFail;
3802       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3803       return MatchOperand_Success;
3804     }
3805 
3806     case AsmToken::Identifier:
3807       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3808           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3809                                   Parser.getTok().getString()), getContext()), S));
3810       Parser.Lex();
3811       return MatchOperand_Success;
3812   }
3813 }
3814 
3815 //===----------------------------------------------------------------------===//
3816 // mubuf
3817 //===----------------------------------------------------------------------===//
3818 
3819 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3820   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3821 }
3822 
3823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3824   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3825 }
3826 
3827 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3828   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3829 }
3830 
3831 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3832                                const OperandVector &Operands,
3833                                bool IsAtomic, bool IsAtomicReturn) {
3834   OptionalImmIndexMap OptionalIdx;
3835   assert(IsAtomicReturn ? IsAtomic : true);
3836 
3837   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3838     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3839 
3840     // Add the register arguments
3841     if (Op.isReg()) {
3842       Op.addRegOperands(Inst, 1);
3843       continue;
3844     }
3845 
3846     // Handle the case where soffset is an immediate
3847     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3848       Op.addImmOperands(Inst, 1);
3849       continue;
3850     }
3851 
3852     // Handle tokens like 'offen' which are sometimes hard-coded into the
3853     // asm string.  There are no MCInst operands for these.
3854     if (Op.isToken()) {
3855       continue;
3856     }
3857     assert(Op.isImm());
3858 
3859     // Handle optional arguments
3860     OptionalIdx[Op.getImmTy()] = i;
3861   }
3862 
3863   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3864   if (IsAtomicReturn) {
3865     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3866     Inst.insert(I, *I);
3867   }
3868 
3869   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3870   if (!IsAtomic) { // glc is hard-coded.
3871     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3872   }
3873   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3874   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3875 }
3876 
3877 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3878   OptionalImmIndexMap OptionalIdx;
3879 
3880   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3881     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3882 
3883     // Add the register arguments
3884     if (Op.isReg()) {
3885       Op.addRegOperands(Inst, 1);
3886       continue;
3887     }
3888 
3889     // Handle the case where soffset is an immediate
3890     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3891       Op.addImmOperands(Inst, 1);
3892       continue;
3893     }
3894 
3895     // Handle tokens like 'offen' which are sometimes hard-coded into the
3896     // asm string.  There are no MCInst operands for these.
3897     if (Op.isToken()) {
3898       continue;
3899     }
3900     assert(Op.isImm());
3901 
3902     // Handle optional arguments
3903     OptionalIdx[Op.getImmTy()] = i;
3904   }
3905 
3906   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3907                         AMDGPUOperand::ImmTyOffset);
3908   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3909   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3910   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3911   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3912   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3913 }
3914 
3915 //===----------------------------------------------------------------------===//
3916 // mimg
3917 //===----------------------------------------------------------------------===//
3918 
3919 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3920                               bool IsAtomic) {
3921   unsigned I = 1;
3922   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3923   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3924     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3925   }
3926 
3927   if (IsAtomic) {
3928     // Add src, same as dst
3929     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3930   }
3931 
3932   OptionalImmIndexMap OptionalIdx;
3933 
3934   for (unsigned E = Operands.size(); I != E; ++I) {
3935     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3936 
3937     // Add the register arguments
3938     if (Op.isRegOrImm()) {
3939       Op.addRegOrImmOperands(Inst, 1);
3940       continue;
3941     } else if (Op.isImmModifier()) {
3942       OptionalIdx[Op.getImmTy()] = I;
3943     } else {
3944       llvm_unreachable("unexpected operand type");
3945     }
3946   }
3947 
3948   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3949   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3952   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3953   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3956 }
3957 
3958 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
3959   cvtMIMG(Inst, Operands, true);
3960 }
3961 
3962 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
3963   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
3964 }
3965 
3966 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
3967   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
3968 }
3969 
3970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
3971   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
3972 }
3973 
3974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
3975   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
3976 }
3977 
3978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
3979   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
3980 }
3981 
3982 //===----------------------------------------------------------------------===//
3983 // smrd
3984 //===----------------------------------------------------------------------===//
3985 
3986 bool AMDGPUOperand::isSMRDOffset8() const {
3987   return isImm() && isUInt<8>(getImm());
3988 }
3989 
3990 bool AMDGPUOperand::isSMRDOffset20() const {
3991   return isImm() && isUInt<20>(getImm());
3992 }
3993 
3994 bool AMDGPUOperand::isSMRDLiteralOffset() const {
3995   // 32-bit literals are only supported on CI and we only want to use them
3996   // when the offset is > 8-bits.
3997   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
3998 }
3999 
4000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4001   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4002 }
4003 
4004 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4005   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4006 }
4007 
4008 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4009   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4010 }
4011 
4012 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4013   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4014 }
4015 
4016 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4017   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4018 }
4019 
4020 //===----------------------------------------------------------------------===//
4021 // vop3
4022 //===----------------------------------------------------------------------===//
4023 
4024 static bool ConvertOmodMul(int64_t &Mul) {
4025   if (Mul != 1 && Mul != 2 && Mul != 4)
4026     return false;
4027 
4028   Mul >>= 1;
4029   return true;
4030 }
4031 
4032 static bool ConvertOmodDiv(int64_t &Div) {
4033   if (Div == 1) {
4034     Div = 0;
4035     return true;
4036   }
4037 
4038   if (Div == 2) {
4039     Div = 3;
4040     return true;
4041   }
4042 
4043   return false;
4044 }
4045 
4046 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4047   if (BoundCtrl == 0) {
4048     BoundCtrl = 1;
4049     return true;
4050   }
4051 
4052   if (BoundCtrl == -1) {
4053     BoundCtrl = 0;
4054     return true;
4055   }
4056 
4057   return false;
4058 }
4059 
4060 // Note: the order in this table matches the order of operands in AsmString.
4061 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4062   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4063   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4064   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4065   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4066   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4067   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4068   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4069   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4070   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4071   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4072   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4073   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4074   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4075   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4076   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4077   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4078   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4079   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4080   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4081   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4082   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4083   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4084   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4085   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4086   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4087   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4088   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4089   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4090   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4091   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4092   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4093   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4094   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4095 };
4096 
4097 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4098   OperandMatchResultTy res;
4099   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4100     // try to parse any optional operand here
4101     if (Op.IsBit) {
4102       res = parseNamedBit(Op.Name, Operands, Op.Type);
4103     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4104       res = parseOModOperand(Operands);
4105     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4106                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4107                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4108       res = parseSDWASel(Operands, Op.Name, Op.Type);
4109     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4110       res = parseSDWADstUnused(Operands);
4111     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4112                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4113                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4114                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4115       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4116                                         Op.ConvertResult);
4117     } else {
4118       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4119     }
4120     if (res != MatchOperand_NoMatch) {
4121       return res;
4122     }
4123   }
4124   return MatchOperand_NoMatch;
4125 }
4126 
4127 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4128   StringRef Name = Parser.getTok().getString();
4129   if (Name == "mul") {
4130     return parseIntWithPrefix("mul", Operands,
4131                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4132   }
4133 
4134   if (Name == "div") {
4135     return parseIntWithPrefix("div", Operands,
4136                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4137   }
4138 
4139   return MatchOperand_NoMatch;
4140 }
4141 
4142 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4143   cvtVOP3P(Inst, Operands);
4144 
4145   int Opc = Inst.getOpcode();
4146 
4147   int SrcNum;
4148   const int Ops[] = { AMDGPU::OpName::src0,
4149                       AMDGPU::OpName::src1,
4150                       AMDGPU::OpName::src2 };
4151   for (SrcNum = 0;
4152        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4153        ++SrcNum);
4154   assert(SrcNum > 0);
4155 
4156   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4157   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4158 
4159   if ((OpSel & (1 << SrcNum)) != 0) {
4160     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4161     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4162     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4163   }
4164 }
4165 
4166 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4167       // 1. This operand is input modifiers
4168   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4169       // 2. This is not last operand
4170       && Desc.NumOperands > (OpNum + 1)
4171       // 3. Next operand is register class
4172       && Desc.OpInfo[OpNum + 1].RegClass != -1
4173       // 4. Next register is not tied to any other operand
4174       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4175 }
4176 
4177 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4178 {
4179   OptionalImmIndexMap OptionalIdx;
4180   unsigned Opc = Inst.getOpcode();
4181 
4182   unsigned I = 1;
4183   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4184   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4185     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4186   }
4187 
4188   for (unsigned E = Operands.size(); I != E; ++I) {
4189     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4190     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4191       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4192     } else if (Op.isInterpSlot() ||
4193                Op.isInterpAttr() ||
4194                Op.isAttrChan()) {
4195       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4196     } else if (Op.isImmModifier()) {
4197       OptionalIdx[Op.getImmTy()] = I;
4198     } else {
4199       llvm_unreachable("unhandled operand type");
4200     }
4201   }
4202 
4203   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4204     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4205   }
4206 
4207   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4208     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4209   }
4210 
4211   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4212     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4213   }
4214 }
4215 
4216 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4217                               OptionalImmIndexMap &OptionalIdx) {
4218   unsigned Opc = Inst.getOpcode();
4219 
4220   unsigned I = 1;
4221   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4222   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4223     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4224   }
4225 
4226   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4227     // This instruction has src modifiers
4228     for (unsigned E = Operands.size(); I != E; ++I) {
4229       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4230       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4231         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4232       } else if (Op.isImmModifier()) {
4233         OptionalIdx[Op.getImmTy()] = I;
4234       } else if (Op.isRegOrImm()) {
4235         Op.addRegOrImmOperands(Inst, 1);
4236       } else {
4237         llvm_unreachable("unhandled operand type");
4238       }
4239     }
4240   } else {
4241     // No src modifiers
4242     for (unsigned E = Operands.size(); I != E; ++I) {
4243       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4244       if (Op.isMod()) {
4245         OptionalIdx[Op.getImmTy()] = I;
4246       } else {
4247         Op.addRegOrImmOperands(Inst, 1);
4248       }
4249     }
4250   }
4251 
4252   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4253     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4254   }
4255 
4256   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4257     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4258   }
4259 
4260   // special case v_mac_{f16, f32}:
4261   // it has src2 register operand that is tied to dst operand
4262   // we don't allow modifiers for this operand in assembler so src2_modifiers
4263   // should be 0
4264   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4265       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4266     auto it = Inst.begin();
4267     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4268     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4269     ++it;
4270     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4271   }
4272 }
4273 
4274 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4275   OptionalImmIndexMap OptionalIdx;
4276   cvtVOP3(Inst, Operands, OptionalIdx);
4277 }
4278 
4279 void AMDGPUAsmParser::cvtVOP3PImpl(MCInst &Inst,
4280                                    const OperandVector &Operands,
4281                                    bool IsPacked) {
4282   OptionalImmIndexMap OptIdx;
4283   int Opc = Inst.getOpcode();
4284 
4285   cvtVOP3(Inst, Operands, OptIdx);
4286 
4287   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4288     assert(!IsPacked);
4289     Inst.addOperand(Inst.getOperand(0));
4290   }
4291 
4292   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4293   // instruction, and then figure out where to actually put the modifiers
4294 
4295   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4296 
4297   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4298   if (OpSelHiIdx != -1) {
4299     // TODO: Should we change the printing to match?
4300     int DefaultVal = IsPacked ? -1 : 0;
4301     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4302                           DefaultVal);
4303   }
4304 
4305   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4306   if (NegLoIdx != -1) {
4307     assert(IsPacked);
4308     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4309     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4310   }
4311 
4312   const int Ops[] = { AMDGPU::OpName::src0,
4313                       AMDGPU::OpName::src1,
4314                       AMDGPU::OpName::src2 };
4315   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4316                          AMDGPU::OpName::src1_modifiers,
4317                          AMDGPU::OpName::src2_modifiers };
4318 
4319   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4320 
4321   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4322   unsigned OpSelHi = 0;
4323   unsigned NegLo = 0;
4324   unsigned NegHi = 0;
4325 
4326   if (OpSelHiIdx != -1) {
4327     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4328   }
4329 
4330   if (NegLoIdx != -1) {
4331     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4332     NegLo = Inst.getOperand(NegLoIdx).getImm();
4333     NegHi = Inst.getOperand(NegHiIdx).getImm();
4334   }
4335 
4336   for (int J = 0; J < 3; ++J) {
4337     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4338     if (OpIdx == -1)
4339       break;
4340 
4341     uint32_t ModVal = 0;
4342 
4343     if ((OpSel & (1 << J)) != 0)
4344       ModVal |= SISrcMods::OP_SEL_0;
4345 
4346     if ((OpSelHi & (1 << J)) != 0)
4347       ModVal |= SISrcMods::OP_SEL_1;
4348 
4349     if ((NegLo & (1 << J)) != 0)
4350       ModVal |= SISrcMods::NEG;
4351 
4352     if ((NegHi & (1 << J)) != 0)
4353       ModVal |= SISrcMods::NEG_HI;
4354 
4355     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4356 
4357     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4358   }
4359 }
4360 
4361 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
4362   cvtVOP3PImpl(Inst, Operands, true);
4363 }
4364 
4365 void AMDGPUAsmParser::cvtVOP3P_NotPacked(MCInst &Inst,
4366                                          const OperandVector &Operands) {
4367   cvtVOP3PImpl(Inst, Operands, false);
4368 }
4369 
4370 //===----------------------------------------------------------------------===//
4371 // dpp
4372 //===----------------------------------------------------------------------===//
4373 
4374 bool AMDGPUOperand::isDPPCtrl() const {
4375   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4376   if (result) {
4377     int64_t Imm = getImm();
4378     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4379            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4380            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4381            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4382            (Imm == 0x130) ||
4383            (Imm == 0x134) ||
4384            (Imm == 0x138) ||
4385            (Imm == 0x13c) ||
4386            (Imm == 0x140) ||
4387            (Imm == 0x141) ||
4388            (Imm == 0x142) ||
4389            (Imm == 0x143);
4390   }
4391   return false;
4392 }
4393 
4394 bool AMDGPUOperand::isGPRIdxMode() const {
4395   return isImm() && isUInt<4>(getImm());
4396 }
4397 
4398 bool AMDGPUOperand::isS16Imm() const {
4399   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4400 }
4401 
4402 bool AMDGPUOperand::isU16Imm() const {
4403   return isImm() && isUInt<16>(getImm());
4404 }
4405 
4406 OperandMatchResultTy
4407 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4408   SMLoc S = Parser.getTok().getLoc();
4409   StringRef Prefix;
4410   int64_t Int;
4411 
4412   if (getLexer().getKind() == AsmToken::Identifier) {
4413     Prefix = Parser.getTok().getString();
4414   } else {
4415     return MatchOperand_NoMatch;
4416   }
4417 
4418   if (Prefix == "row_mirror") {
4419     Int = 0x140;
4420     Parser.Lex();
4421   } else if (Prefix == "row_half_mirror") {
4422     Int = 0x141;
4423     Parser.Lex();
4424   } else {
4425     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4426     if (Prefix != "quad_perm"
4427         && Prefix != "row_shl"
4428         && Prefix != "row_shr"
4429         && Prefix != "row_ror"
4430         && Prefix != "wave_shl"
4431         && Prefix != "wave_rol"
4432         && Prefix != "wave_shr"
4433         && Prefix != "wave_ror"
4434         && Prefix != "row_bcast") {
4435       return MatchOperand_NoMatch;
4436     }
4437 
4438     Parser.Lex();
4439     if (getLexer().isNot(AsmToken::Colon))
4440       return MatchOperand_ParseFail;
4441 
4442     if (Prefix == "quad_perm") {
4443       // quad_perm:[%d,%d,%d,%d]
4444       Parser.Lex();
4445       if (getLexer().isNot(AsmToken::LBrac))
4446         return MatchOperand_ParseFail;
4447       Parser.Lex();
4448 
4449       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4450         return MatchOperand_ParseFail;
4451 
4452       for (int i = 0; i < 3; ++i) {
4453         if (getLexer().isNot(AsmToken::Comma))
4454           return MatchOperand_ParseFail;
4455         Parser.Lex();
4456 
4457         int64_t Temp;
4458         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4459           return MatchOperand_ParseFail;
4460         const int shift = i*2 + 2;
4461         Int += (Temp << shift);
4462       }
4463 
4464       if (getLexer().isNot(AsmToken::RBrac))
4465         return MatchOperand_ParseFail;
4466       Parser.Lex();
4467     } else {
4468       // sel:%d
4469       Parser.Lex();
4470       if (getParser().parseAbsoluteExpression(Int))
4471         return MatchOperand_ParseFail;
4472 
4473       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4474         Int |= 0x100;
4475       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4476         Int |= 0x110;
4477       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4478         Int |= 0x120;
4479       } else if (Prefix == "wave_shl" && 1 == Int) {
4480         Int = 0x130;
4481       } else if (Prefix == "wave_rol" && 1 == Int) {
4482         Int = 0x134;
4483       } else if (Prefix == "wave_shr" && 1 == Int) {
4484         Int = 0x138;
4485       } else if (Prefix == "wave_ror" && 1 == Int) {
4486         Int = 0x13C;
4487       } else if (Prefix == "row_bcast") {
4488         if (Int == 15) {
4489           Int = 0x142;
4490         } else if (Int == 31) {
4491           Int = 0x143;
4492         } else {
4493           return MatchOperand_ParseFail;
4494         }
4495       } else {
4496         return MatchOperand_ParseFail;
4497       }
4498     }
4499   }
4500 
4501   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4502   return MatchOperand_Success;
4503 }
4504 
4505 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4506   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4507 }
4508 
4509 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4510   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4511 }
4512 
4513 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4514   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4515 }
4516 
4517 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4518   OptionalImmIndexMap OptionalIdx;
4519 
4520   unsigned I = 1;
4521   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4522   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4523     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4524   }
4525 
4526   // All DPP instructions with at least one source operand have a fake "old"
4527   // source at the beginning that's tied to the dst operand. Handle it here.
4528   if (Desc.getNumOperands() >= 2)
4529     Inst.addOperand(Inst.getOperand(0));
4530 
4531   for (unsigned E = Operands.size(); I != E; ++I) {
4532     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4533     // Add the register arguments
4534     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4535       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4536       // Skip it.
4537       continue;
4538     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4539       Op.addRegWithFPInputModsOperands(Inst, 2);
4540     } else if (Op.isDPPCtrl()) {
4541       Op.addImmOperands(Inst, 1);
4542     } else if (Op.isImm()) {
4543       // Handle optional arguments
4544       OptionalIdx[Op.getImmTy()] = I;
4545     } else {
4546       llvm_unreachable("Invalid operand type");
4547     }
4548   }
4549 
4550   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4551   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4552   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4553 }
4554 
4555 //===----------------------------------------------------------------------===//
4556 // sdwa
4557 //===----------------------------------------------------------------------===//
4558 
4559 OperandMatchResultTy
4560 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4561                               AMDGPUOperand::ImmTy Type) {
4562   using namespace llvm::AMDGPU::SDWA;
4563 
4564   SMLoc S = Parser.getTok().getLoc();
4565   StringRef Value;
4566   OperandMatchResultTy res;
4567 
4568   res = parseStringWithPrefix(Prefix, Value);
4569   if (res != MatchOperand_Success) {
4570     return res;
4571   }
4572 
4573   int64_t Int;
4574   Int = StringSwitch<int64_t>(Value)
4575         .Case("BYTE_0", SdwaSel::BYTE_0)
4576         .Case("BYTE_1", SdwaSel::BYTE_1)
4577         .Case("BYTE_2", SdwaSel::BYTE_2)
4578         .Case("BYTE_3", SdwaSel::BYTE_3)
4579         .Case("WORD_0", SdwaSel::WORD_0)
4580         .Case("WORD_1", SdwaSel::WORD_1)
4581         .Case("DWORD", SdwaSel::DWORD)
4582         .Default(0xffffffff);
4583   Parser.Lex(); // eat last token
4584 
4585   if (Int == 0xffffffff) {
4586     return MatchOperand_ParseFail;
4587   }
4588 
4589   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4590   return MatchOperand_Success;
4591 }
4592 
4593 OperandMatchResultTy
4594 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4595   using namespace llvm::AMDGPU::SDWA;
4596 
4597   SMLoc S = Parser.getTok().getLoc();
4598   StringRef Value;
4599   OperandMatchResultTy res;
4600 
4601   res = parseStringWithPrefix("dst_unused", Value);
4602   if (res != MatchOperand_Success) {
4603     return res;
4604   }
4605 
4606   int64_t Int;
4607   Int = StringSwitch<int64_t>(Value)
4608         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4609         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4610         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4611         .Default(0xffffffff);
4612   Parser.Lex(); // eat last token
4613 
4614   if (Int == 0xffffffff) {
4615     return MatchOperand_ParseFail;
4616   }
4617 
4618   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4619   return MatchOperand_Success;
4620 }
4621 
4622 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4623   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4624 }
4625 
4626 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4627   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4628 }
4629 
4630 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4631   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4632 }
4633 
4634 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4635   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4636 }
4637 
4638 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4639                               uint64_t BasicInstType, bool skipVcc) {
4640   using namespace llvm::AMDGPU::SDWA;
4641 
4642   OptionalImmIndexMap OptionalIdx;
4643   bool skippedVcc = false;
4644 
4645   unsigned I = 1;
4646   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4647   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4648     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4649   }
4650 
4651   for (unsigned E = Operands.size(); I != E; ++I) {
4652     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4653     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4654       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4655       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4656       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4657       // Skip VCC only if we didn't skip it on previous iteration.
4658       if (BasicInstType == SIInstrFlags::VOP2 &&
4659           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4660         skippedVcc = true;
4661         continue;
4662       } else if (BasicInstType == SIInstrFlags::VOPC &&
4663                  Inst.getNumOperands() == 0) {
4664         skippedVcc = true;
4665         continue;
4666       }
4667     }
4668     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4669       Op.addRegWithInputModsOperands(Inst, 2);
4670     } else if (Op.isImm()) {
4671       // Handle optional arguments
4672       OptionalIdx[Op.getImmTy()] = I;
4673     } else {
4674       llvm_unreachable("Invalid operand type");
4675     }
4676     skippedVcc = false;
4677   }
4678 
4679   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4680       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4681     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4682     switch (BasicInstType) {
4683     case SIInstrFlags::VOP1:
4684       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4685       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4686         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4687       }
4688       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4689       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4690       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4691       break;
4692 
4693     case SIInstrFlags::VOP2:
4694       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4695       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4696         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4697       }
4698       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4699       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4700       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4701       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4702       break;
4703 
4704     case SIInstrFlags::VOPC:
4705       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4706       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4707       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4708       break;
4709 
4710     default:
4711       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4712     }
4713   }
4714 
4715   // special case v_mac_{f16, f32}:
4716   // it has src2 register operand that is tied to dst operand
4717   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4718       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4719     auto it = Inst.begin();
4720     std::advance(
4721       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4722     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4723   }
4724 }
4725 
4726 /// Force static initialization.
4727 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4728   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4729   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4730 }
4731 
4732 #define GET_REGISTER_MATCHER
4733 #define GET_MATCHER_IMPLEMENTATION
4734 #include "AMDGPUGenAsmMatcher.inc"
4735 
4736 // This fuction should be defined after auto-generated include so that we have
4737 // MatchClassKind enum defined
4738 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4739                                                      unsigned Kind) {
4740   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4741   // But MatchInstructionImpl() expects to meet token and fails to validate
4742   // operand. This method checks if we are given immediate operand but expect to
4743   // get corresponding token.
4744   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4745   switch (Kind) {
4746   case MCK_addr64:
4747     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4748   case MCK_gds:
4749     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4750   case MCK_glc:
4751     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4752   case MCK_idxen:
4753     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4754   case MCK_offen:
4755     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4756   case MCK_SSrcB32:
4757     // When operands have expression values, they will return true for isToken,
4758     // because it is not possible to distinguish between a token and an
4759     // expression at parse time. MatchInstructionImpl() will always try to
4760     // match an operand as a token, when isToken returns true, and when the
4761     // name of the expression is not a valid token, the match will fail,
4762     // so we need to handle it here.
4763     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4764   case MCK_SSrcF32:
4765     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4766   case MCK_SoppBrTarget:
4767     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4768   case MCK_VReg32OrOff:
4769     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4770   case MCK_InterpSlot:
4771     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4772   case MCK_Attr:
4773     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4774   case MCK_AttrChan:
4775     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4776   default:
4777     return Match_InvalidOperand;
4778   }
4779 }
4780