1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUCodeObjectMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyOffset0,
136     ImmTyOffset1,
137     ImmTyGLC,
138     ImmTySLC,
139     ImmTyTFE,
140     ImmTyClampSI,
141     ImmTyOModSI,
142     ImmTyDppCtrl,
143     ImmTyDppRowMask,
144     ImmTyDppBankMask,
145     ImmTyDppBoundCtrl,
146     ImmTySdwaDstSel,
147     ImmTySdwaSrc0Sel,
148     ImmTySdwaSrc1Sel,
149     ImmTySdwaDstUnused,
150     ImmTyDMask,
151     ImmTyUNorm,
152     ImmTyDA,
153     ImmTyR128,
154     ImmTyLWE,
155     ImmTyExpTgt,
156     ImmTyExpCompr,
157     ImmTyExpVM,
158     ImmTyDFMT,
159     ImmTyNFMT,
160     ImmTyHwreg,
161     ImmTyOff,
162     ImmTySendMsg,
163     ImmTyInterpSlot,
164     ImmTyInterpAttr,
165     ImmTyAttrChan,
166     ImmTyOpSel,
167     ImmTyOpSelHi,
168     ImmTyNegLo,
169     ImmTyNegHi,
170     ImmTySwizzle,
171     ImmTyHigh
172   };
173 
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     bool IsForcedVOP3;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     if (Kind != Expression || !Expr)
204       return false;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   bool isInlinableImm(MVT type) const;
218   bool isLiteralImm(MVT type) const;
219 
220   bool isRegKind() const {
221     return Kind == Register;
222   }
223 
224   bool isReg() const override {
225     return isRegKind() && !hasModifiers();
226   }
227 
228   bool isRegOrImmWithInputMods(MVT type) const {
229     return isRegKind() || isInlinableImm(type);
230   }
231 
232   bool isRegOrImmWithInt16InputMods() const {
233     return isRegOrImmWithInputMods(MVT::i16);
234   }
235 
236   bool isRegOrImmWithInt32InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i32);
238   }
239 
240   bool isRegOrImmWithInt64InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i64);
242   }
243 
244   bool isRegOrImmWithFP16InputMods() const {
245     return isRegOrImmWithInputMods(MVT::f16);
246   }
247 
248   bool isRegOrImmWithFP32InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f32);
250   }
251 
252   bool isRegOrImmWithFP64InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f64);
254   }
255 
256   bool isVReg() const {
257     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
258            isRegClass(AMDGPU::VReg_64RegClassID) ||
259            isRegClass(AMDGPU::VReg_96RegClassID) ||
260            isRegClass(AMDGPU::VReg_128RegClassID) ||
261            isRegClass(AMDGPU::VReg_256RegClassID) ||
262            isRegClass(AMDGPU::VReg_512RegClassID);
263   }
264 
265   bool isVReg32OrOff() const {
266     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
267   }
268 
269   bool isSDWARegKind() const;
270 
271   bool isImmTy(ImmTy ImmT) const {
272     return isImm() && Imm.Type == ImmT;
273   }
274 
275   bool isImmModifier() const {
276     return isImm() && Imm.Type != ImmTyNone;
277   }
278 
279   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
280   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
281   bool isDMask() const { return isImmTy(ImmTyDMask); }
282   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
283   bool isDA() const { return isImmTy(ImmTyDA); }
284   bool isR128() const { return isImmTy(ImmTyUNorm); }
285   bool isLWE() const { return isImmTy(ImmTyLWE); }
286   bool isOff() const { return isImmTy(ImmTyOff); }
287   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
288   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
289   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
290   bool isOffen() const { return isImmTy(ImmTyOffen); }
291   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
292   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
293   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
294   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
295   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
296 
297   bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
298   bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
299   bool isGDS() const { return isImmTy(ImmTyGDS); }
300   bool isGLC() const { return isImmTy(ImmTyGLC); }
301   bool isSLC() const { return isImmTy(ImmTySLC); }
302   bool isTFE() const { return isImmTy(ImmTyTFE); }
303   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
304   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
305   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
306   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
307   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
308   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
309   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
310   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
311   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
312   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
313   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
314   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
315   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
316   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
317   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
318   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
319   bool isHigh() const { return isImmTy(ImmTyHigh); }
320 
321   bool isMod() const {
322     return isClampSI() || isOModSI();
323   }
324 
325   bool isRegOrImm() const {
326     return isReg() || isImm();
327   }
328 
329   bool isRegClass(unsigned RCID) const;
330 
331   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
332     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
333   }
334 
335   bool isSCSrcB16() const {
336     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
337   }
338 
339   bool isSCSrcV2B16() const {
340     return isSCSrcB16();
341   }
342 
343   bool isSCSrcB32() const {
344     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
345   }
346 
347   bool isSCSrcB64() const {
348     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
349   }
350 
351   bool isSCSrcF16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
353   }
354 
355   bool isSCSrcV2F16() const {
356     return isSCSrcF16();
357   }
358 
359   bool isSCSrcF32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
361   }
362 
363   bool isSCSrcF64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
365   }
366 
367   bool isSSrcB32() const {
368     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
369   }
370 
371   bool isSSrcB16() const {
372     return isSCSrcB16() || isLiteralImm(MVT::i16);
373   }
374 
375   bool isSSrcV2B16() const {
376     llvm_unreachable("cannot happen");
377     return isSSrcB16();
378   }
379 
380   bool isSSrcB64() const {
381     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
382     // See isVSrc64().
383     return isSCSrcB64() || isLiteralImm(MVT::i64);
384   }
385 
386   bool isSSrcF32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
388   }
389 
390   bool isSSrcF64() const {
391     return isSCSrcB64() || isLiteralImm(MVT::f64);
392   }
393 
394   bool isSSrcF16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::f16);
396   }
397 
398   bool isSSrcV2F16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcF16();
401   }
402 
403   bool isVCSrcB32() const {
404     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
405   }
406 
407   bool isVCSrcB64() const {
408     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
409   }
410 
411   bool isVCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
413   }
414 
415   bool isVCSrcV2B16() const {
416     return isVCSrcB16();
417   }
418 
419   bool isVCSrcF32() const {
420     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
421   }
422 
423   bool isVCSrcF64() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
425   }
426 
427   bool isVCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
429   }
430 
431   bool isVCSrcV2F16() const {
432     return isVCSrcF16();
433   }
434 
435   bool isVSrcB32() const {
436     return isVCSrcF32() || isLiteralImm(MVT::i32);
437   }
438 
439   bool isVSrcB64() const {
440     return isVCSrcF64() || isLiteralImm(MVT::i64);
441   }
442 
443   bool isVSrcB16() const {
444     return isVCSrcF16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isVSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isVSrcB16();
450   }
451 
452   bool isVSrcF32() const {
453     return isVCSrcF32() || isLiteralImm(MVT::f32);
454   }
455 
456   bool isVSrcF64() const {
457     return isVCSrcF64() || isLiteralImm(MVT::f64);
458   }
459 
460   bool isVSrcF16() const {
461     return isVCSrcF16() || isLiteralImm(MVT::f16);
462   }
463 
464   bool isVSrcV2F16() const {
465     llvm_unreachable("cannot happen");
466     return isVSrcF16();
467   }
468 
469   bool isKImmFP32() const {
470     return isLiteralImm(MVT::f32);
471   }
472 
473   bool isKImmFP16() const {
474     return isLiteralImm(MVT::f16);
475   }
476 
477   bool isMem() const override {
478     return false;
479   }
480 
481   bool isExpr() const {
482     return Kind == Expression;
483   }
484 
485   bool isSoppBrTarget() const {
486     return isExpr() || isImm();
487   }
488 
489   bool isSWaitCnt() const;
490   bool isHwreg() const;
491   bool isSendMsg() const;
492   bool isSwizzle() const;
493   bool isSMRDOffset8() const;
494   bool isSMRDOffset20() const;
495   bool isSMRDLiteralOffset() const;
496   bool isDPPCtrl() const;
497   bool isGPRIdxMode() const;
498   bool isS16Imm() const;
499   bool isU16Imm() const;
500 
501   StringRef getExpressionAsToken() const {
502     assert(isExpr());
503     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
504     return S->getSymbol().getName();
505   }
506 
507   StringRef getToken() const {
508     assert(isToken());
509 
510     if (Kind == Expression)
511       return getExpressionAsToken();
512 
513     return StringRef(Tok.Data, Tok.Length);
514   }
515 
516   int64_t getImm() const {
517     assert(isImm());
518     return Imm.Val;
519   }
520 
521   ImmTy getImmTy() const {
522     assert(isImm());
523     return Imm.Type;
524   }
525 
526   unsigned getReg() const override {
527     return Reg.RegNo;
528   }
529 
530   SMLoc getStartLoc() const override {
531     return StartLoc;
532   }
533 
534   SMLoc getEndLoc() const override {
535     return EndLoc;
536   }
537 
538   Modifiers getModifiers() const {
539     assert(isRegKind() || isImmTy(ImmTyNone));
540     return isRegKind() ? Reg.Mods : Imm.Mods;
541   }
542 
543   void setModifiers(Modifiers Mods) {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     if (isRegKind())
546       Reg.Mods = Mods;
547     else
548       Imm.Mods = Mods;
549   }
550 
551   bool hasModifiers() const {
552     return getModifiers().hasModifiers();
553   }
554 
555   bool hasFPModifiers() const {
556     return getModifiers().hasFPModifiers();
557   }
558 
559   bool hasIntModifiers() const {
560     return getModifiers().hasIntModifiers();
561   }
562 
563   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
564 
565   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
566 
567   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
568 
569   template <unsigned Bitwidth>
570   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
571 
572   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
573     addKImmFPOperands<16>(Inst, N);
574   }
575 
576   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
577     addKImmFPOperands<32>(Inst, N);
578   }
579 
580   void addRegOperands(MCInst &Inst, unsigned N) const;
581 
582   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
583     if (isRegKind())
584       addRegOperands(Inst, N);
585     else if (isExpr())
586       Inst.addOperand(MCOperand::createExpr(Expr));
587     else
588       addImmOperands(Inst, N);
589   }
590 
591   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
592     Modifiers Mods = getModifiers();
593     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
594     if (isRegKind()) {
595       addRegOperands(Inst, N);
596     } else {
597       addImmOperands(Inst, N, false);
598     }
599   }
600 
601   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
602     assert(!hasIntModifiers());
603     addRegOrImmWithInputModsOperands(Inst, N);
604   }
605 
606   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasFPModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
612     Modifiers Mods = getModifiers();
613     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
614     assert(isRegKind());
615     addRegOperands(Inst, N);
616   }
617 
618   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
619     assert(!hasIntModifiers());
620     addRegWithInputModsOperands(Inst, N);
621   }
622 
623   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasFPModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
629     if (isImm())
630       addImmOperands(Inst, N);
631     else {
632       assert(isExpr());
633       Inst.addOperand(MCOperand::createExpr(Expr));
634     }
635   }
636 
637   static void printImmTy(raw_ostream& OS, ImmTy Type) {
638     switch (Type) {
639     case ImmTyNone: OS << "None"; break;
640     case ImmTyGDS: OS << "GDS"; break;
641     case ImmTyOffen: OS << "Offen"; break;
642     case ImmTyIdxen: OS << "Idxen"; break;
643     case ImmTyAddr64: OS << "Addr64"; break;
644     case ImmTyOffset: OS << "Offset"; break;
645     case ImmTyOffset0: OS << "Offset0"; break;
646     case ImmTyOffset1: OS << "Offset1"; break;
647     case ImmTyGLC: OS << "GLC"; break;
648     case ImmTySLC: OS << "SLC"; break;
649     case ImmTyTFE: OS << "TFE"; break;
650     case ImmTyDFMT: OS << "DFMT"; break;
651     case ImmTyNFMT: OS << "NFMT"; break;
652     case ImmTyClampSI: OS << "ClampSI"; break;
653     case ImmTyOModSI: OS << "OModSI"; break;
654     case ImmTyDppCtrl: OS << "DppCtrl"; break;
655     case ImmTyDppRowMask: OS << "DppRowMask"; break;
656     case ImmTyDppBankMask: OS << "DppBankMask"; break;
657     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
658     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
659     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
660     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
661     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
662     case ImmTyDMask: OS << "DMask"; break;
663     case ImmTyUNorm: OS << "UNorm"; break;
664     case ImmTyDA: OS << "DA"; break;
665     case ImmTyR128: OS << "R128"; break;
666     case ImmTyLWE: OS << "LWE"; break;
667     case ImmTyOff: OS << "Off"; break;
668     case ImmTyExpTgt: OS << "ExpTgt"; break;
669     case ImmTyExpCompr: OS << "ExpCompr"; break;
670     case ImmTyExpVM: OS << "ExpVM"; break;
671     case ImmTyHwreg: OS << "Hwreg"; break;
672     case ImmTySendMsg: OS << "SendMsg"; break;
673     case ImmTyInterpSlot: OS << "InterpSlot"; break;
674     case ImmTyInterpAttr: OS << "InterpAttr"; break;
675     case ImmTyAttrChan: OS << "AttrChan"; break;
676     case ImmTyOpSel: OS << "OpSel"; break;
677     case ImmTyOpSelHi: OS << "OpSelHi"; break;
678     case ImmTyNegLo: OS << "NegLo"; break;
679     case ImmTyNegHi: OS << "NegHi"; break;
680     case ImmTySwizzle: OS << "Swizzle"; break;
681     case ImmTyHigh: OS << "High"; break;
682     }
683   }
684 
685   void print(raw_ostream &OS) const override {
686     switch (Kind) {
687     case Register:
688       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
689       break;
690     case Immediate:
691       OS << '<' << getImm();
692       if (getImmTy() != ImmTyNone) {
693         OS << " type: "; printImmTy(OS, getImmTy());
694       }
695       OS << " mods: " << Imm.Mods << '>';
696       break;
697     case Token:
698       OS << '\'' << getToken() << '\'';
699       break;
700     case Expression:
701       OS << "<expr " << *Expr << '>';
702       break;
703     }
704   }
705 
706   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
707                                       int64_t Val, SMLoc Loc,
708                                       ImmTy Type = ImmTyNone,
709                                       bool IsFPImm = false) {
710     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
711     Op->Imm.Val = Val;
712     Op->Imm.IsFPImm = IsFPImm;
713     Op->Imm.Type = Type;
714     Op->Imm.Mods = Modifiers();
715     Op->StartLoc = Loc;
716     Op->EndLoc = Loc;
717     return Op;
718   }
719 
720   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
721                                         StringRef Str, SMLoc Loc,
722                                         bool HasExplicitEncodingSize = true) {
723     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
724     Res->Tok.Data = Str.data();
725     Res->Tok.Length = Str.size();
726     Res->StartLoc = Loc;
727     Res->EndLoc = Loc;
728     return Res;
729   }
730 
731   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
732                                       unsigned RegNo, SMLoc S,
733                                       SMLoc E,
734                                       bool ForceVOP3) {
735     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
736     Op->Reg.RegNo = RegNo;
737     Op->Reg.Mods = Modifiers();
738     Op->Reg.IsForcedVOP3 = ForceVOP3;
739     Op->StartLoc = S;
740     Op->EndLoc = E;
741     return Op;
742   }
743 
744   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
745                                        const class MCExpr *Expr, SMLoc S) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
747     Op->Expr = Expr;
748     Op->StartLoc = S;
749     Op->EndLoc = S;
750     return Op;
751   }
752 };
753 
754 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
755   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
756   return OS;
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // AsmParser
761 //===----------------------------------------------------------------------===//
762 
763 // Holds info related to the current kernel, e.g. count of SGPRs used.
764 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
765 // .amdgpu_hsa_kernel or at EOF.
766 class KernelScopeInfo {
767   int SgprIndexUnusedMin = -1;
768   int VgprIndexUnusedMin = -1;
769   MCContext *Ctx = nullptr;
770 
771   void usesSgprAt(int i) {
772     if (i >= SgprIndexUnusedMin) {
773       SgprIndexUnusedMin = ++i;
774       if (Ctx) {
775         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
776         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
777       }
778     }
779   }
780 
781   void usesVgprAt(int i) {
782     if (i >= VgprIndexUnusedMin) {
783       VgprIndexUnusedMin = ++i;
784       if (Ctx) {
785         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
786         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
787       }
788     }
789   }
790 
791 public:
792   KernelScopeInfo() = default;
793 
794   void initialize(MCContext &Context) {
795     Ctx = &Context;
796     usesSgprAt(SgprIndexUnusedMin = -1);
797     usesVgprAt(VgprIndexUnusedMin = -1);
798   }
799 
800   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
801     switch (RegKind) {
802       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
803       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
804       default: break;
805     }
806   }
807 };
808 
809 class AMDGPUAsmParser : public MCTargetAsmParser {
810   const MCInstrInfo &MII;
811   MCAsmParser &Parser;
812 
813   unsigned ForcedEncodingSize = 0;
814   bool ForcedDPP = false;
815   bool ForcedSDWA = false;
816   KernelScopeInfo KernelScope;
817 
818   /// @name Auto-generated Match Functions
819   /// {
820 
821 #define GET_ASSEMBLER_HEADER
822 #include "AMDGPUGenAsmMatcher.inc"
823 
824   /// }
825 
826 private:
827   bool ParseAsAbsoluteExpression(uint32_t &Ret);
828   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
829   bool ParseDirectiveHSACodeObjectVersion();
830   bool ParseDirectiveHSACodeObjectISA();
831   bool ParseDirectiveCodeObjectMetadata();
832   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
833   bool ParseDirectiveAMDKernelCodeT();
834   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
835   bool ParseDirectiveAMDGPUHsaKernel();
836   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
837                              RegisterKind RegKind, unsigned Reg1,
838                              unsigned RegNum);
839   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
840                            unsigned& RegNum, unsigned& RegWidth,
841                            unsigned *DwordRegIndex);
842   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
843                     bool IsAtomic, bool IsAtomicReturn);
844   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
845                  bool IsGdsHardcoded);
846 
847 public:
848   enum AMDGPUMatchResultTy {
849     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
850   };
851 
852   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
853 
854   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
855                const MCInstrInfo &MII,
856                const MCTargetOptions &Options)
857       : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
858     MCAsmParserExtension::Initialize(Parser);
859 
860     if (getFeatureBits().none()) {
861       // Set default features.
862       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
863     }
864 
865     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
866 
867     {
868       // TODO: make those pre-defined variables read-only.
869       // Currently there is none suitable machinery in the core llvm-mc for this.
870       // MCSymbol::isRedefinable is intended for another purpose, and
871       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
872       AMDGPU::IsaInfo::IsaVersion ISA =
873           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
874       MCContext &Ctx = getContext();
875       MCSymbol *Sym =
876           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
877       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
878       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
879       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
880       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
881       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
882     }
883     KernelScope.initialize(getContext());
884   }
885 
886   bool isSI() const {
887     return AMDGPU::isSI(getSTI());
888   }
889 
890   bool isCI() const {
891     return AMDGPU::isCI(getSTI());
892   }
893 
894   bool isVI() const {
895     return AMDGPU::isVI(getSTI());
896   }
897 
898   bool isGFX9() const {
899     return AMDGPU::isGFX9(getSTI());
900   }
901 
902   bool hasInv2PiInlineImm() const {
903     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
904   }
905 
906   bool hasFlatOffsets() const {
907     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
908   }
909 
910   bool hasSGPR102_SGPR103() const {
911     return !isVI();
912   }
913 
914   bool hasIntClamp() const {
915     return getFeatureBits()[AMDGPU::FeatureIntClamp];
916   }
917 
918   AMDGPUTargetStreamer &getTargetStreamer() {
919     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
920     return static_cast<AMDGPUTargetStreamer &>(TS);
921   }
922 
923   const MCRegisterInfo *getMRI() const {
924     // We need this const_cast because for some reason getContext() is not const
925     // in MCAsmParser.
926     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
927   }
928 
929   const MCInstrInfo *getMII() const {
930     return &MII;
931   }
932 
933   const FeatureBitset &getFeatureBits() const {
934     return getSTI().getFeatureBits();
935   }
936 
937   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
938   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
939   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
940 
941   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
942   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
943   bool isForcedDPP() const { return ForcedDPP; }
944   bool isForcedSDWA() const { return ForcedSDWA; }
945   ArrayRef<unsigned> getMatchedVariants() const;
946 
947   std::unique_ptr<AMDGPUOperand> parseRegister();
948   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
949   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
950   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
951                                       unsigned Kind) override;
952   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
953                                OperandVector &Operands, MCStreamer &Out,
954                                uint64_t &ErrorInfo,
955                                bool MatchingInlineAsm) override;
956   bool ParseDirective(AsmToken DirectiveID) override;
957   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
958   StringRef parseMnemonicSuffix(StringRef Name);
959   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
960                         SMLoc NameLoc, OperandVector &Operands) override;
961   //bool ProcessInstruction(MCInst &Inst);
962 
963   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
964 
965   OperandMatchResultTy
966   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
967                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
968                      bool (*ConvertResult)(int64_t &) = nullptr);
969 
970   OperandMatchResultTy parseOperandArrayWithPrefix(
971     const char *Prefix,
972     OperandVector &Operands,
973     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
974     bool (*ConvertResult)(int64_t&) = nullptr);
975 
976   OperandMatchResultTy
977   parseNamedBit(const char *Name, OperandVector &Operands,
978                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
979   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
980                                              StringRef &Value);
981 
982   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
983   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
984   OperandMatchResultTy parseReg(OperandVector &Operands);
985   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
986   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
987   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
988   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
989   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
990   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
991 
992   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
993   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
994   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
995   void cvtExp(MCInst &Inst, const OperandVector &Operands);
996 
997   bool parseCnt(int64_t &IntVal);
998   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
999   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1000 
1001 private:
1002   struct OperandInfoTy {
1003     int64_t Id;
1004     bool IsSymbolic = false;
1005 
1006     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1007   };
1008 
1009   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1010   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1011 
1012   void errorExpTgt();
1013   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1014 
1015   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1016   bool validateConstantBusLimitations(const MCInst &Inst);
1017   bool validateEarlyClobberLimitations(const MCInst &Inst);
1018   bool validateIntClampSupported(const MCInst &Inst);
1019   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1020   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1021   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1022 
1023   bool trySkipId(const StringRef Id);
1024   bool trySkipToken(const AsmToken::TokenKind Kind);
1025   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1026   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1027   bool parseExpr(int64_t &Imm);
1028 
1029 public:
1030   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1031 
1032   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1033   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1034   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1035   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1036   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1037 
1038   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1039                             const unsigned MinVal,
1040                             const unsigned MaxVal,
1041                             const StringRef ErrMsg);
1042   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1043   bool parseSwizzleOffset(int64_t &Imm);
1044   bool parseSwizzleMacro(int64_t &Imm);
1045   bool parseSwizzleQuadPerm(int64_t &Imm);
1046   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1047   bool parseSwizzleBroadcast(int64_t &Imm);
1048   bool parseSwizzleSwap(int64_t &Imm);
1049   bool parseSwizzleReverse(int64_t &Imm);
1050 
1051   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1052   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1053   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1054   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1055 
1056   AMDGPUOperand::Ptr defaultGLC() const;
1057   AMDGPUOperand::Ptr defaultSLC() const;
1058   AMDGPUOperand::Ptr defaultTFE() const;
1059 
1060   AMDGPUOperand::Ptr defaultDMask() const;
1061   AMDGPUOperand::Ptr defaultUNorm() const;
1062   AMDGPUOperand::Ptr defaultDA() const;
1063   AMDGPUOperand::Ptr defaultR128() const;
1064   AMDGPUOperand::Ptr defaultLWE() const;
1065   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1066   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1067   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1068   AMDGPUOperand::Ptr defaultOffsetU12() const;
1069   AMDGPUOperand::Ptr defaultOffsetS13() const;
1070 
1071   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1072 
1073   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1074                OptionalImmIndexMap &OptionalIdx);
1075   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1076   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1077   void cvtVOP3PImpl(MCInst &Inst, const OperandVector &Operands,
1078                     bool IsPacked);
1079   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1080   void cvtVOP3P_NotPacked(MCInst &Inst, const OperandVector &Operands);
1081 
1082   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1083 
1084   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1085                bool IsAtomic = false);
1086   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1087 
1088   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1089   AMDGPUOperand::Ptr defaultRowMask() const;
1090   AMDGPUOperand::Ptr defaultBankMask() const;
1091   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1092   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1093 
1094   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1095                                     AMDGPUOperand::ImmTy Type);
1096   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1097   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1098   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1099   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1100   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1101   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1102                 uint64_t BasicInstType, bool skipVcc = false);
1103 };
1104 
1105 struct OptionalOperand {
1106   const char *Name;
1107   AMDGPUOperand::ImmTy Type;
1108   bool IsBit;
1109   bool (*ConvertResult)(int64_t&);
1110 };
1111 
1112 } // end anonymous namespace
1113 
1114 // May be called with integer type with equivalent bitwidth.
1115 static const fltSemantics *getFltSemantics(unsigned Size) {
1116   switch (Size) {
1117   case 4:
1118     return &APFloat::IEEEsingle();
1119   case 8:
1120     return &APFloat::IEEEdouble();
1121   case 2:
1122     return &APFloat::IEEEhalf();
1123   default:
1124     llvm_unreachable("unsupported fp type");
1125   }
1126 }
1127 
1128 static const fltSemantics *getFltSemantics(MVT VT) {
1129   return getFltSemantics(VT.getSizeInBits() / 8);
1130 }
1131 
1132 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1133   switch (OperandType) {
1134   case AMDGPU::OPERAND_REG_IMM_INT32:
1135   case AMDGPU::OPERAND_REG_IMM_FP32:
1136   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1137   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1138     return &APFloat::IEEEsingle();
1139   case AMDGPU::OPERAND_REG_IMM_INT64:
1140   case AMDGPU::OPERAND_REG_IMM_FP64:
1141   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1142   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1143     return &APFloat::IEEEdouble();
1144   case AMDGPU::OPERAND_REG_IMM_INT16:
1145   case AMDGPU::OPERAND_REG_IMM_FP16:
1146   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1147   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1148   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1149   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1150     return &APFloat::IEEEhalf();
1151   default:
1152     llvm_unreachable("unsupported fp type");
1153   }
1154 }
1155 
1156 //===----------------------------------------------------------------------===//
1157 // Operand
1158 //===----------------------------------------------------------------------===//
1159 
1160 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1161   bool Lost;
1162 
1163   // Convert literal to single precision
1164   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1165                                                APFloat::rmNearestTiesToEven,
1166                                                &Lost);
1167   // We allow precision lost but not overflow or underflow
1168   if (Status != APFloat::opOK &&
1169       Lost &&
1170       ((Status & APFloat::opOverflow)  != 0 ||
1171        (Status & APFloat::opUnderflow) != 0)) {
1172     return false;
1173   }
1174 
1175   return true;
1176 }
1177 
1178 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1179   if (!isImmTy(ImmTyNone)) {
1180     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1181     return false;
1182   }
1183   // TODO: We should avoid using host float here. It would be better to
1184   // check the float bit values which is what a few other places do.
1185   // We've had bot failures before due to weird NaN support on mips hosts.
1186 
1187   APInt Literal(64, Imm.Val);
1188 
1189   if (Imm.IsFPImm) { // We got fp literal token
1190     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1191       return AMDGPU::isInlinableLiteral64(Imm.Val,
1192                                           AsmParser->hasInv2PiInlineImm());
1193     }
1194 
1195     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1196     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1197       return false;
1198 
1199     if (type.getScalarSizeInBits() == 16) {
1200       return AMDGPU::isInlinableLiteral16(
1201         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1202         AsmParser->hasInv2PiInlineImm());
1203     }
1204 
1205     // Check if single precision literal is inlinable
1206     return AMDGPU::isInlinableLiteral32(
1207       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1208       AsmParser->hasInv2PiInlineImm());
1209   }
1210 
1211   // We got int literal token.
1212   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1213     return AMDGPU::isInlinableLiteral64(Imm.Val,
1214                                         AsmParser->hasInv2PiInlineImm());
1215   }
1216 
1217   if (type.getScalarSizeInBits() == 16) {
1218     return AMDGPU::isInlinableLiteral16(
1219       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1220       AsmParser->hasInv2PiInlineImm());
1221   }
1222 
1223   return AMDGPU::isInlinableLiteral32(
1224     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1225     AsmParser->hasInv2PiInlineImm());
1226 }
1227 
1228 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1229   // Check that this immediate can be added as literal
1230   if (!isImmTy(ImmTyNone)) {
1231     return false;
1232   }
1233 
1234   if (!Imm.IsFPImm) {
1235     // We got int literal token.
1236 
1237     if (type == MVT::f64 && hasFPModifiers()) {
1238       // Cannot apply fp modifiers to int literals preserving the same semantics
1239       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1240       // disable these cases.
1241       return false;
1242     }
1243 
1244     unsigned Size = type.getSizeInBits();
1245     if (Size == 64)
1246       Size = 32;
1247 
1248     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1249     // types.
1250     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1251   }
1252 
1253   // We got fp literal token
1254   if (type == MVT::f64) { // Expected 64-bit fp operand
1255     // We would set low 64-bits of literal to zeroes but we accept this literals
1256     return true;
1257   }
1258 
1259   if (type == MVT::i64) { // Expected 64-bit int operand
1260     // We don't allow fp literals in 64-bit integer instructions. It is
1261     // unclear how we should encode them.
1262     return false;
1263   }
1264 
1265   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1266   return canLosslesslyConvertToFPType(FPLiteral, type);
1267 }
1268 
1269 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1270   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1271 }
1272 
1273 bool AMDGPUOperand::isSDWARegKind() const {
1274   if (AsmParser->isVI())
1275     return isVReg();
1276   else if (AsmParser->isGFX9())
1277     return isRegKind();
1278   else
1279     return false;
1280 }
1281 
1282 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1283 {
1284   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1285   assert(Size == 2 || Size == 4 || Size == 8);
1286 
1287   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1288 
1289   if (Imm.Mods.Abs) {
1290     Val &= ~FpSignMask;
1291   }
1292   if (Imm.Mods.Neg) {
1293     Val ^= FpSignMask;
1294   }
1295 
1296   return Val;
1297 }
1298 
1299 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1300   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1301                              Inst.getNumOperands())) {
1302     addLiteralImmOperand(Inst, Imm.Val,
1303                          ApplyModifiers &
1304                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1305   } else {
1306     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1307     Inst.addOperand(MCOperand::createImm(Imm.Val));
1308   }
1309 }
1310 
1311 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1312   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1313   auto OpNum = Inst.getNumOperands();
1314   // Check that this operand accepts literals
1315   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1316 
1317   if (ApplyModifiers) {
1318     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1319     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1320     Val = applyInputFPModifiers(Val, Size);
1321   }
1322 
1323   APInt Literal(64, Val);
1324   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1325 
1326   if (Imm.IsFPImm) { // We got fp literal token
1327     switch (OpTy) {
1328     case AMDGPU::OPERAND_REG_IMM_INT64:
1329     case AMDGPU::OPERAND_REG_IMM_FP64:
1330     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1331     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1332       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1333                                        AsmParser->hasInv2PiInlineImm())) {
1334         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1335         return;
1336       }
1337 
1338       // Non-inlineable
1339       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1340         // For fp operands we check if low 32 bits are zeros
1341         if (Literal.getLoBits(32) != 0) {
1342           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1343           "Can't encode literal as exact 64-bit floating-point operand. "
1344           "Low 32-bits will be set to zero");
1345         }
1346 
1347         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1348         return;
1349       }
1350 
1351       // We don't allow fp literals in 64-bit integer instructions. It is
1352       // unclear how we should encode them. This case should be checked earlier
1353       // in predicate methods (isLiteralImm())
1354       llvm_unreachable("fp literal in 64-bit integer instruction.");
1355 
1356     case AMDGPU::OPERAND_REG_IMM_INT32:
1357     case AMDGPU::OPERAND_REG_IMM_FP32:
1358     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1359     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1360     case AMDGPU::OPERAND_REG_IMM_INT16:
1361     case AMDGPU::OPERAND_REG_IMM_FP16:
1362     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1363     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1364     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1365     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1366       bool lost;
1367       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1368       // Convert literal to single precision
1369       FPLiteral.convert(*getOpFltSemantics(OpTy),
1370                         APFloat::rmNearestTiesToEven, &lost);
1371       // We allow precision lost but not overflow or underflow. This should be
1372       // checked earlier in isLiteralImm()
1373 
1374       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1375       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1376           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1377         ImmVal |= (ImmVal << 16);
1378       }
1379 
1380       Inst.addOperand(MCOperand::createImm(ImmVal));
1381       return;
1382     }
1383     default:
1384       llvm_unreachable("invalid operand size");
1385     }
1386 
1387     return;
1388   }
1389 
1390    // We got int literal token.
1391   // Only sign extend inline immediates.
1392   // FIXME: No errors on truncation
1393   switch (OpTy) {
1394   case AMDGPU::OPERAND_REG_IMM_INT32:
1395   case AMDGPU::OPERAND_REG_IMM_FP32:
1396   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1397   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1398     if (isInt<32>(Val) &&
1399         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1400                                      AsmParser->hasInv2PiInlineImm())) {
1401       Inst.addOperand(MCOperand::createImm(Val));
1402       return;
1403     }
1404 
1405     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1406     return;
1407 
1408   case AMDGPU::OPERAND_REG_IMM_INT64:
1409   case AMDGPU::OPERAND_REG_IMM_FP64:
1410   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1411   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1412     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1413       Inst.addOperand(MCOperand::createImm(Val));
1414       return;
1415     }
1416 
1417     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1418     return;
1419 
1420   case AMDGPU::OPERAND_REG_IMM_INT16:
1421   case AMDGPU::OPERAND_REG_IMM_FP16:
1422   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1423   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1424     if (isInt<16>(Val) &&
1425         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1426                                      AsmParser->hasInv2PiInlineImm())) {
1427       Inst.addOperand(MCOperand::createImm(Val));
1428       return;
1429     }
1430 
1431     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1432     return;
1433 
1434   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1435   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1436     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1437     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1438                                         AsmParser->hasInv2PiInlineImm()));
1439 
1440     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1441                       static_cast<uint32_t>(LiteralVal);
1442     Inst.addOperand(MCOperand::createImm(ImmVal));
1443     return;
1444   }
1445   default:
1446     llvm_unreachable("invalid operand size");
1447   }
1448 }
1449 
1450 template <unsigned Bitwidth>
1451 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1452   APInt Literal(64, Imm.Val);
1453 
1454   if (!Imm.IsFPImm) {
1455     // We got int literal token.
1456     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1457     return;
1458   }
1459 
1460   bool Lost;
1461   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1462   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1463                     APFloat::rmNearestTiesToEven, &Lost);
1464   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1465 }
1466 
1467 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1468   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1469 }
1470 
1471 //===----------------------------------------------------------------------===//
1472 // AsmParser
1473 //===----------------------------------------------------------------------===//
1474 
1475 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1476   if (Is == IS_VGPR) {
1477     switch (RegWidth) {
1478       default: return -1;
1479       case 1: return AMDGPU::VGPR_32RegClassID;
1480       case 2: return AMDGPU::VReg_64RegClassID;
1481       case 3: return AMDGPU::VReg_96RegClassID;
1482       case 4: return AMDGPU::VReg_128RegClassID;
1483       case 8: return AMDGPU::VReg_256RegClassID;
1484       case 16: return AMDGPU::VReg_512RegClassID;
1485     }
1486   } else if (Is == IS_TTMP) {
1487     switch (RegWidth) {
1488       default: return -1;
1489       case 1: return AMDGPU::TTMP_32RegClassID;
1490       case 2: return AMDGPU::TTMP_64RegClassID;
1491       case 4: return AMDGPU::TTMP_128RegClassID;
1492     }
1493   } else if (Is == IS_SGPR) {
1494     switch (RegWidth) {
1495       default: return -1;
1496       case 1: return AMDGPU::SGPR_32RegClassID;
1497       case 2: return AMDGPU::SGPR_64RegClassID;
1498       case 4: return AMDGPU::SGPR_128RegClassID;
1499       case 8: return AMDGPU::SReg_256RegClassID;
1500       case 16: return AMDGPU::SReg_512RegClassID;
1501     }
1502   }
1503   return -1;
1504 }
1505 
1506 static unsigned getSpecialRegForName(StringRef RegName) {
1507   return StringSwitch<unsigned>(RegName)
1508     .Case("exec", AMDGPU::EXEC)
1509     .Case("vcc", AMDGPU::VCC)
1510     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1511     .Case("m0", AMDGPU::M0)
1512     .Case("scc", AMDGPU::SCC)
1513     .Case("tba", AMDGPU::TBA)
1514     .Case("tma", AMDGPU::TMA)
1515     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1516     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1517     .Case("vcc_lo", AMDGPU::VCC_LO)
1518     .Case("vcc_hi", AMDGPU::VCC_HI)
1519     .Case("exec_lo", AMDGPU::EXEC_LO)
1520     .Case("exec_hi", AMDGPU::EXEC_HI)
1521     .Case("tma_lo", AMDGPU::TMA_LO)
1522     .Case("tma_hi", AMDGPU::TMA_HI)
1523     .Case("tba_lo", AMDGPU::TBA_LO)
1524     .Case("tba_hi", AMDGPU::TBA_HI)
1525     .Default(0);
1526 }
1527 
1528 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1529                                     SMLoc &EndLoc) {
1530   auto R = parseRegister();
1531   if (!R) return true;
1532   assert(R->isReg());
1533   RegNo = R->getReg();
1534   StartLoc = R->getStartLoc();
1535   EndLoc = R->getEndLoc();
1536   return false;
1537 }
1538 
1539 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1540                                             RegisterKind RegKind, unsigned Reg1,
1541                                             unsigned RegNum) {
1542   switch (RegKind) {
1543   case IS_SPECIAL:
1544     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1545       Reg = AMDGPU::EXEC;
1546       RegWidth = 2;
1547       return true;
1548     }
1549     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1550       Reg = AMDGPU::FLAT_SCR;
1551       RegWidth = 2;
1552       return true;
1553     }
1554     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1555       Reg = AMDGPU::VCC;
1556       RegWidth = 2;
1557       return true;
1558     }
1559     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1560       Reg = AMDGPU::TBA;
1561       RegWidth = 2;
1562       return true;
1563     }
1564     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1565       Reg = AMDGPU::TMA;
1566       RegWidth = 2;
1567       return true;
1568     }
1569     return false;
1570   case IS_VGPR:
1571   case IS_SGPR:
1572   case IS_TTMP:
1573     if (Reg1 != Reg + RegWidth) {
1574       return false;
1575     }
1576     RegWidth++;
1577     return true;
1578   default:
1579     llvm_unreachable("unexpected register kind");
1580   }
1581 }
1582 
1583 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1584                                           unsigned &RegNum, unsigned &RegWidth,
1585                                           unsigned *DwordRegIndex) {
1586   if (DwordRegIndex) { *DwordRegIndex = 0; }
1587   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1588   if (getLexer().is(AsmToken::Identifier)) {
1589     StringRef RegName = Parser.getTok().getString();
1590     if ((Reg = getSpecialRegForName(RegName))) {
1591       Parser.Lex();
1592       RegKind = IS_SPECIAL;
1593     } else {
1594       unsigned RegNumIndex = 0;
1595       if (RegName[0] == 'v') {
1596         RegNumIndex = 1;
1597         RegKind = IS_VGPR;
1598       } else if (RegName[0] == 's') {
1599         RegNumIndex = 1;
1600         RegKind = IS_SGPR;
1601       } else if (RegName.startswith("ttmp")) {
1602         RegNumIndex = strlen("ttmp");
1603         RegKind = IS_TTMP;
1604       } else {
1605         return false;
1606       }
1607       if (RegName.size() > RegNumIndex) {
1608         // Single 32-bit register: vXX.
1609         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1610           return false;
1611         Parser.Lex();
1612         RegWidth = 1;
1613       } else {
1614         // Range of registers: v[XX:YY]. ":YY" is optional.
1615         Parser.Lex();
1616         int64_t RegLo, RegHi;
1617         if (getLexer().isNot(AsmToken::LBrac))
1618           return false;
1619         Parser.Lex();
1620 
1621         if (getParser().parseAbsoluteExpression(RegLo))
1622           return false;
1623 
1624         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1625         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1626           return false;
1627         Parser.Lex();
1628 
1629         if (isRBrace) {
1630           RegHi = RegLo;
1631         } else {
1632           if (getParser().parseAbsoluteExpression(RegHi))
1633             return false;
1634 
1635           if (getLexer().isNot(AsmToken::RBrac))
1636             return false;
1637           Parser.Lex();
1638         }
1639         RegNum = (unsigned) RegLo;
1640         RegWidth = (RegHi - RegLo) + 1;
1641       }
1642     }
1643   } else if (getLexer().is(AsmToken::LBrac)) {
1644     // List of consecutive registers: [s0,s1,s2,s3]
1645     Parser.Lex();
1646     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1647       return false;
1648     if (RegWidth != 1)
1649       return false;
1650     RegisterKind RegKind1;
1651     unsigned Reg1, RegNum1, RegWidth1;
1652     do {
1653       if (getLexer().is(AsmToken::Comma)) {
1654         Parser.Lex();
1655       } else if (getLexer().is(AsmToken::RBrac)) {
1656         Parser.Lex();
1657         break;
1658       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1659         if (RegWidth1 != 1) {
1660           return false;
1661         }
1662         if (RegKind1 != RegKind) {
1663           return false;
1664         }
1665         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1666           return false;
1667         }
1668       } else {
1669         return false;
1670       }
1671     } while (true);
1672   } else {
1673     return false;
1674   }
1675   switch (RegKind) {
1676   case IS_SPECIAL:
1677     RegNum = 0;
1678     RegWidth = 1;
1679     break;
1680   case IS_VGPR:
1681   case IS_SGPR:
1682   case IS_TTMP:
1683   {
1684     unsigned Size = 1;
1685     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1686       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1687       Size = std::min(RegWidth, 4u);
1688     }
1689     if (RegNum % Size != 0)
1690       return false;
1691     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1692     RegNum = RegNum / Size;
1693     int RCID = getRegClass(RegKind, RegWidth);
1694     if (RCID == -1)
1695       return false;
1696     const MCRegisterClass RC = TRI->getRegClass(RCID);
1697     if (RegNum >= RC.getNumRegs())
1698       return false;
1699     Reg = RC.getRegister(RegNum);
1700     break;
1701   }
1702 
1703   default:
1704     llvm_unreachable("unexpected register kind");
1705   }
1706 
1707   if (!subtargetHasRegister(*TRI, Reg))
1708     return false;
1709   return true;
1710 }
1711 
1712 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1713   const auto &Tok = Parser.getTok();
1714   SMLoc StartLoc = Tok.getLoc();
1715   SMLoc EndLoc = Tok.getEndLoc();
1716   RegisterKind RegKind;
1717   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1718 
1719   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1720     return nullptr;
1721   }
1722   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1723   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1724 }
1725 
1726 bool
1727 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1728   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1729       (getLexer().getKind() == AsmToken::Integer ||
1730        getLexer().getKind() == AsmToken::Real)) {
1731     // This is a workaround for handling operands like these:
1732     //     |1.0|
1733     //     |-1|
1734     // This syntax is not compatible with syntax of standard
1735     // MC expressions (due to the trailing '|').
1736 
1737     SMLoc EndLoc;
1738     const MCExpr *Expr;
1739 
1740     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1741       return true;
1742     }
1743 
1744     return !Expr->evaluateAsAbsolute(Val);
1745   }
1746 
1747   return getParser().parseAbsoluteExpression(Val);
1748 }
1749 
1750 OperandMatchResultTy
1751 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1752   // TODO: add syntactic sugar for 1/(2*PI)
1753   bool Minus = false;
1754   if (getLexer().getKind() == AsmToken::Minus) {
1755     Minus = true;
1756     Parser.Lex();
1757   }
1758 
1759   SMLoc S = Parser.getTok().getLoc();
1760   switch(getLexer().getKind()) {
1761   case AsmToken::Integer: {
1762     int64_t IntVal;
1763     if (parseAbsoluteExpr(IntVal, AbsMod))
1764       return MatchOperand_ParseFail;
1765     if (Minus)
1766       IntVal *= -1;
1767     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1768     return MatchOperand_Success;
1769   }
1770   case AsmToken::Real: {
1771     int64_t IntVal;
1772     if (parseAbsoluteExpr(IntVal, AbsMod))
1773       return MatchOperand_ParseFail;
1774 
1775     APFloat F(BitsToDouble(IntVal));
1776     if (Minus)
1777       F.changeSign();
1778     Operands.push_back(
1779         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1780                                  AMDGPUOperand::ImmTyNone, true));
1781     return MatchOperand_Success;
1782   }
1783   default:
1784     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1785   }
1786 }
1787 
1788 OperandMatchResultTy
1789 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1790   if (auto R = parseRegister()) {
1791     assert(R->isReg());
1792     R->Reg.IsForcedVOP3 = isForcedVOP3();
1793     Operands.push_back(std::move(R));
1794     return MatchOperand_Success;
1795   }
1796   return MatchOperand_NoMatch;
1797 }
1798 
1799 OperandMatchResultTy
1800 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1801   auto res = parseImm(Operands, AbsMod);
1802   if (res != MatchOperand_NoMatch) {
1803     return res;
1804   }
1805 
1806   return parseReg(Operands);
1807 }
1808 
1809 OperandMatchResultTy
1810 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1811                                               bool AllowImm) {
1812   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1813 
1814   if (getLexer().getKind()== AsmToken::Minus) {
1815     const AsmToken NextToken = getLexer().peekTok();
1816 
1817     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1818     if (NextToken.is(AsmToken::Minus)) {
1819       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1820       return MatchOperand_ParseFail;
1821     }
1822 
1823     // '-' followed by an integer literal N should be interpreted as integer
1824     // negation rather than a floating-point NEG modifier applied to N.
1825     // Beside being contr-intuitive, such use of floating-point NEG modifier
1826     // results in different meaning of integer literals used with VOP1/2/C
1827     // and VOP3, for example:
1828     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1829     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1830     // Negative fp literals should be handled likewise for unifomtity
1831     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1832       Parser.Lex();
1833       Negate = true;
1834     }
1835   }
1836 
1837   if (getLexer().getKind() == AsmToken::Identifier &&
1838       Parser.getTok().getString() == "neg") {
1839     if (Negate) {
1840       Error(Parser.getTok().getLoc(), "expected register or immediate");
1841       return MatchOperand_ParseFail;
1842     }
1843     Parser.Lex();
1844     Negate2 = true;
1845     if (getLexer().isNot(AsmToken::LParen)) {
1846       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1847       return MatchOperand_ParseFail;
1848     }
1849     Parser.Lex();
1850   }
1851 
1852   if (getLexer().getKind() == AsmToken::Identifier &&
1853       Parser.getTok().getString() == "abs") {
1854     Parser.Lex();
1855     Abs2 = true;
1856     if (getLexer().isNot(AsmToken::LParen)) {
1857       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1858       return MatchOperand_ParseFail;
1859     }
1860     Parser.Lex();
1861   }
1862 
1863   if (getLexer().getKind() == AsmToken::Pipe) {
1864     if (Abs2) {
1865       Error(Parser.getTok().getLoc(), "expected register or immediate");
1866       return MatchOperand_ParseFail;
1867     }
1868     Parser.Lex();
1869     Abs = true;
1870   }
1871 
1872   OperandMatchResultTy Res;
1873   if (AllowImm) {
1874     Res = parseRegOrImm(Operands, Abs);
1875   } else {
1876     Res = parseReg(Operands);
1877   }
1878   if (Res != MatchOperand_Success) {
1879     return Res;
1880   }
1881 
1882   AMDGPUOperand::Modifiers Mods;
1883   if (Abs) {
1884     if (getLexer().getKind() != AsmToken::Pipe) {
1885       Error(Parser.getTok().getLoc(), "expected vertical bar");
1886       return MatchOperand_ParseFail;
1887     }
1888     Parser.Lex();
1889     Mods.Abs = true;
1890   }
1891   if (Abs2) {
1892     if (getLexer().isNot(AsmToken::RParen)) {
1893       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1894       return MatchOperand_ParseFail;
1895     }
1896     Parser.Lex();
1897     Mods.Abs = true;
1898   }
1899 
1900   if (Negate) {
1901     Mods.Neg = true;
1902   } else if (Negate2) {
1903     if (getLexer().isNot(AsmToken::RParen)) {
1904       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1905       return MatchOperand_ParseFail;
1906     }
1907     Parser.Lex();
1908     Mods.Neg = true;
1909   }
1910 
1911   if (Mods.hasFPModifiers()) {
1912     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1913     Op.setModifiers(Mods);
1914   }
1915   return MatchOperand_Success;
1916 }
1917 
1918 OperandMatchResultTy
1919 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1920                                                bool AllowImm) {
1921   bool Sext = false;
1922 
1923   if (getLexer().getKind() == AsmToken::Identifier &&
1924       Parser.getTok().getString() == "sext") {
1925     Parser.Lex();
1926     Sext = true;
1927     if (getLexer().isNot(AsmToken::LParen)) {
1928       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1929       return MatchOperand_ParseFail;
1930     }
1931     Parser.Lex();
1932   }
1933 
1934   OperandMatchResultTy Res;
1935   if (AllowImm) {
1936     Res = parseRegOrImm(Operands);
1937   } else {
1938     Res = parseReg(Operands);
1939   }
1940   if (Res != MatchOperand_Success) {
1941     return Res;
1942   }
1943 
1944   AMDGPUOperand::Modifiers Mods;
1945   if (Sext) {
1946     if (getLexer().isNot(AsmToken::RParen)) {
1947       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1948       return MatchOperand_ParseFail;
1949     }
1950     Parser.Lex();
1951     Mods.Sext = true;
1952   }
1953 
1954   if (Mods.hasIntModifiers()) {
1955     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1956     Op.setModifiers(Mods);
1957   }
1958 
1959   return MatchOperand_Success;
1960 }
1961 
1962 OperandMatchResultTy
1963 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1964   return parseRegOrImmWithFPInputMods(Operands, false);
1965 }
1966 
1967 OperandMatchResultTy
1968 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1969   return parseRegOrImmWithIntInputMods(Operands, false);
1970 }
1971 
1972 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1973   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1974   if (Reg) {
1975     Operands.push_back(std::move(Reg));
1976     return MatchOperand_Success;
1977   }
1978 
1979   const AsmToken &Tok = Parser.getTok();
1980   if (Tok.getString() == "off") {
1981     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1982                                                 AMDGPUOperand::ImmTyOff, false));
1983     Parser.Lex();
1984     return MatchOperand_Success;
1985   }
1986 
1987   return MatchOperand_NoMatch;
1988 }
1989 
1990 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1991   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1992 
1993   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1994       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1995       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1996       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
1997     return Match_InvalidOperand;
1998 
1999   if ((TSFlags & SIInstrFlags::VOP3) &&
2000       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2001       getForcedEncodingSize() != 64)
2002     return Match_PreferE32;
2003 
2004   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2005       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2006     // v_mac_f32/16 allow only dst_sel == DWORD;
2007     auto OpNum =
2008         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2009     const auto &Op = Inst.getOperand(OpNum);
2010     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2011       return Match_InvalidOperand;
2012     }
2013   }
2014 
2015   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2016     // FIXME: Produces error without correct column reported.
2017     auto OpNum =
2018         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2019     const auto &Op = Inst.getOperand(OpNum);
2020     if (Op.getImm() != 0)
2021       return Match_InvalidOperand;
2022   }
2023 
2024   return Match_Success;
2025 }
2026 
2027 // What asm variants we should check
2028 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2029   if (getForcedEncodingSize() == 32) {
2030     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2031     return makeArrayRef(Variants);
2032   }
2033 
2034   if (isForcedVOP3()) {
2035     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2036     return makeArrayRef(Variants);
2037   }
2038 
2039   if (isForcedSDWA()) {
2040     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2041                                         AMDGPUAsmVariants::SDWA9};
2042     return makeArrayRef(Variants);
2043   }
2044 
2045   if (isForcedDPP()) {
2046     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2047     return makeArrayRef(Variants);
2048   }
2049 
2050   static const unsigned Variants[] = {
2051     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2052     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2053   };
2054 
2055   return makeArrayRef(Variants);
2056 }
2057 
2058 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2059   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2060   const unsigned Num = Desc.getNumImplicitUses();
2061   for (unsigned i = 0; i < Num; ++i) {
2062     unsigned Reg = Desc.ImplicitUses[i];
2063     switch (Reg) {
2064     case AMDGPU::FLAT_SCR:
2065     case AMDGPU::VCC:
2066     case AMDGPU::M0:
2067       return Reg;
2068     default:
2069       break;
2070     }
2071   }
2072   return AMDGPU::NoRegister;
2073 }
2074 
2075 // NB: This code is correct only when used to check constant
2076 // bus limitations because GFX7 support no f16 inline constants.
2077 // Note that there are no cases when a GFX7 opcode violates
2078 // constant bus limitations due to the use of an f16 constant.
2079 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2080                                        unsigned OpIdx) const {
2081   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2082 
2083   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2084     return false;
2085   }
2086 
2087   const MCOperand &MO = Inst.getOperand(OpIdx);
2088 
2089   int64_t Val = MO.getImm();
2090   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2091 
2092   switch (OpSize) { // expected operand size
2093   case 8:
2094     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2095   case 4:
2096     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2097   case 2: {
2098     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2099     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2100         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2101       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2102     } else {
2103       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2104     }
2105   }
2106   default:
2107     llvm_unreachable("invalid operand size");
2108   }
2109 }
2110 
2111 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2112   const MCOperand &MO = Inst.getOperand(OpIdx);
2113   if (MO.isImm()) {
2114     return !isInlineConstant(Inst, OpIdx);
2115   }
2116   return !MO.isReg() ||
2117          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2118 }
2119 
2120 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2121   const unsigned Opcode = Inst.getOpcode();
2122   const MCInstrDesc &Desc = MII.get(Opcode);
2123   unsigned ConstantBusUseCount = 0;
2124 
2125   if (Desc.TSFlags &
2126       (SIInstrFlags::VOPC |
2127        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2128        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2129        SIInstrFlags::SDWA)) {
2130     // Check special imm operands (used by madmk, etc)
2131     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2132       ++ConstantBusUseCount;
2133     }
2134 
2135     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2136     if (SGPRUsed != AMDGPU::NoRegister) {
2137       ++ConstantBusUseCount;
2138     }
2139 
2140     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2141     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2142     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2143 
2144     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2145 
2146     for (int OpIdx : OpIndices) {
2147       if (OpIdx == -1) break;
2148 
2149       const MCOperand &MO = Inst.getOperand(OpIdx);
2150       if (usesConstantBus(Inst, OpIdx)) {
2151         if (MO.isReg()) {
2152           const unsigned Reg = mc2PseudoReg(MO.getReg());
2153           // Pairs of registers with a partial intersections like these
2154           //   s0, s[0:1]
2155           //   flat_scratch_lo, flat_scratch
2156           //   flat_scratch_lo, flat_scratch_hi
2157           // are theoretically valid but they are disabled anyway.
2158           // Note that this code mimics SIInstrInfo::verifyInstruction
2159           if (Reg != SGPRUsed) {
2160             ++ConstantBusUseCount;
2161           }
2162           SGPRUsed = Reg;
2163         } else { // Expression or a literal
2164           ++ConstantBusUseCount;
2165         }
2166       }
2167     }
2168   }
2169 
2170   return ConstantBusUseCount <= 1;
2171 }
2172 
2173 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2174   const unsigned Opcode = Inst.getOpcode();
2175   const MCInstrDesc &Desc = MII.get(Opcode);
2176 
2177   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2178   if (DstIdx == -1 ||
2179       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2180     return true;
2181   }
2182 
2183   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2184 
2185   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2186   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2187   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2188 
2189   assert(DstIdx != -1);
2190   const MCOperand &Dst = Inst.getOperand(DstIdx);
2191   assert(Dst.isReg());
2192   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2193 
2194   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2195 
2196   for (int SrcIdx : SrcIndices) {
2197     if (SrcIdx == -1) break;
2198     const MCOperand &Src = Inst.getOperand(SrcIdx);
2199     if (Src.isReg()) {
2200       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2201       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2202         return false;
2203       }
2204     }
2205   }
2206 
2207   return true;
2208 }
2209 
2210 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2211 
2212   const unsigned Opc = Inst.getOpcode();
2213   const MCInstrDesc &Desc = MII.get(Opc);
2214 
2215   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2216     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2217     assert(ClampIdx != -1);
2218     return Inst.getOperand(ClampIdx).getImm() == 0;
2219   }
2220 
2221   return true;
2222 }
2223 
2224 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2225                                           const SMLoc &IDLoc) {
2226   if (!validateConstantBusLimitations(Inst)) {
2227     Error(IDLoc,
2228       "invalid operand (violates constant bus restrictions)");
2229     return false;
2230   }
2231   if (!validateEarlyClobberLimitations(Inst)) {
2232     Error(IDLoc,
2233       "destination must be different than all sources");
2234     return false;
2235   }
2236   if (!validateIntClampSupported(Inst)) {
2237     Error(IDLoc,
2238       "integer clamping is not supported on this GPU");
2239     return false;
2240   }
2241 
2242   return true;
2243 }
2244 
2245 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2246                                               OperandVector &Operands,
2247                                               MCStreamer &Out,
2248                                               uint64_t &ErrorInfo,
2249                                               bool MatchingInlineAsm) {
2250   MCInst Inst;
2251   unsigned Result = Match_Success;
2252   for (auto Variant : getMatchedVariants()) {
2253     uint64_t EI;
2254     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2255                                   Variant);
2256     // We order match statuses from least to most specific. We use most specific
2257     // status as resulting
2258     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2259     if ((R == Match_Success) ||
2260         (R == Match_PreferE32) ||
2261         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2262         (R == Match_InvalidOperand && Result != Match_MissingFeature
2263                                    && Result != Match_PreferE32) ||
2264         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2265                                    && Result != Match_MissingFeature
2266                                    && Result != Match_PreferE32)) {
2267       Result = R;
2268       ErrorInfo = EI;
2269     }
2270     if (R == Match_Success)
2271       break;
2272   }
2273 
2274   switch (Result) {
2275   default: break;
2276   case Match_Success:
2277     if (!validateInstruction(Inst, IDLoc)) {
2278       return true;
2279     }
2280     Inst.setLoc(IDLoc);
2281     Out.EmitInstruction(Inst, getSTI());
2282     return false;
2283 
2284   case Match_MissingFeature:
2285     return Error(IDLoc, "instruction not supported on this GPU");
2286 
2287   case Match_MnemonicFail:
2288     return Error(IDLoc, "unrecognized instruction mnemonic");
2289 
2290   case Match_InvalidOperand: {
2291     SMLoc ErrorLoc = IDLoc;
2292     if (ErrorInfo != ~0ULL) {
2293       if (ErrorInfo >= Operands.size()) {
2294         return Error(IDLoc, "too few operands for instruction");
2295       }
2296       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2297       if (ErrorLoc == SMLoc())
2298         ErrorLoc = IDLoc;
2299     }
2300     return Error(ErrorLoc, "invalid operand for instruction");
2301   }
2302 
2303   case Match_PreferE32:
2304     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2305                         "should be encoded as e32");
2306   }
2307   llvm_unreachable("Implement any new match types added!");
2308 }
2309 
2310 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2311   int64_t Tmp = -1;
2312   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2313     return true;
2314   }
2315   if (getParser().parseAbsoluteExpression(Tmp)) {
2316     return true;
2317   }
2318   Ret = static_cast<uint32_t>(Tmp);
2319   return false;
2320 }
2321 
2322 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2323                                                uint32_t &Minor) {
2324   if (ParseAsAbsoluteExpression(Major))
2325     return TokError("invalid major version");
2326 
2327   if (getLexer().isNot(AsmToken::Comma))
2328     return TokError("minor version number required, comma expected");
2329   Lex();
2330 
2331   if (ParseAsAbsoluteExpression(Minor))
2332     return TokError("invalid minor version");
2333 
2334   return false;
2335 }
2336 
2337 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2338   uint32_t Major;
2339   uint32_t Minor;
2340 
2341   if (ParseDirectiveMajorMinor(Major, Minor))
2342     return true;
2343 
2344   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2345   return false;
2346 }
2347 
2348 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2349   uint32_t Major;
2350   uint32_t Minor;
2351   uint32_t Stepping;
2352   StringRef VendorName;
2353   StringRef ArchName;
2354 
2355   // If this directive has no arguments, then use the ISA version for the
2356   // targeted GPU.
2357   if (getLexer().is(AsmToken::EndOfStatement)) {
2358     AMDGPU::IsaInfo::IsaVersion ISA =
2359         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2360     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2361                                                       ISA.Stepping,
2362                                                       "AMD", "AMDGPU");
2363     return false;
2364   }
2365 
2366   if (ParseDirectiveMajorMinor(Major, Minor))
2367     return true;
2368 
2369   if (getLexer().isNot(AsmToken::Comma))
2370     return TokError("stepping version number required, comma expected");
2371   Lex();
2372 
2373   if (ParseAsAbsoluteExpression(Stepping))
2374     return TokError("invalid stepping version");
2375 
2376   if (getLexer().isNot(AsmToken::Comma))
2377     return TokError("vendor name required, comma expected");
2378   Lex();
2379 
2380   if (getLexer().isNot(AsmToken::String))
2381     return TokError("invalid vendor name");
2382 
2383   VendorName = getLexer().getTok().getStringContents();
2384   Lex();
2385 
2386   if (getLexer().isNot(AsmToken::Comma))
2387     return TokError("arch name required, comma expected");
2388   Lex();
2389 
2390   if (getLexer().isNot(AsmToken::String))
2391     return TokError("invalid arch name");
2392 
2393   ArchName = getLexer().getTok().getStringContents();
2394   Lex();
2395 
2396   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2397                                                     VendorName, ArchName);
2398   return false;
2399 }
2400 
2401 bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
2402   std::string YamlString;
2403   raw_string_ostream YamlStream(YamlString);
2404 
2405   getLexer().setSkipSpace(false);
2406 
2407   bool FoundEnd = false;
2408   while (!getLexer().is(AsmToken::Eof)) {
2409     while (getLexer().is(AsmToken::Space)) {
2410       YamlStream << getLexer().getTok().getString();
2411       Lex();
2412     }
2413 
2414     if (getLexer().is(AsmToken::Identifier)) {
2415       StringRef ID = getLexer().getTok().getIdentifier();
2416       if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
2417         Lex();
2418         FoundEnd = true;
2419         break;
2420       }
2421     }
2422 
2423     YamlStream << Parser.parseStringToEndOfStatement()
2424                << getContext().getAsmInfo()->getSeparatorString();
2425 
2426     Parser.eatToEndOfStatement();
2427   }
2428 
2429   getLexer().setSkipSpace(true);
2430 
2431   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2432     return TokError(
2433         "expected directive .end_amdgpu_code_object_metadata not found");
2434   }
2435 
2436   YamlStream.flush();
2437 
2438   if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString))
2439     return Error(getParser().getTok().getLoc(), "invalid code object metadata");
2440 
2441   return false;
2442 }
2443 
2444 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2445                                                amd_kernel_code_t &Header) {
2446   SmallString<40> ErrStr;
2447   raw_svector_ostream Err(ErrStr);
2448   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2449     return TokError(Err.str());
2450   }
2451   Lex();
2452   return false;
2453 }
2454 
2455 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2456   amd_kernel_code_t Header;
2457   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2458 
2459   while (true) {
2460     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2461     // will set the current token to EndOfStatement.
2462     while(getLexer().is(AsmToken::EndOfStatement))
2463       Lex();
2464 
2465     if (getLexer().isNot(AsmToken::Identifier))
2466       return TokError("expected value identifier or .end_amd_kernel_code_t");
2467 
2468     StringRef ID = getLexer().getTok().getIdentifier();
2469     Lex();
2470 
2471     if (ID == ".end_amd_kernel_code_t")
2472       break;
2473 
2474     if (ParseAMDKernelCodeTValue(ID, Header))
2475       return true;
2476   }
2477 
2478   getTargetStreamer().EmitAMDKernelCodeT(Header);
2479 
2480   return false;
2481 }
2482 
2483 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2484   if (getLexer().isNot(AsmToken::Identifier))
2485     return TokError("expected symbol name");
2486 
2487   StringRef KernelName = Parser.getTok().getString();
2488 
2489   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2490                                            ELF::STT_AMDGPU_HSA_KERNEL);
2491   Lex();
2492   KernelScope.initialize(getContext());
2493   return false;
2494 }
2495 
2496 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2497   StringRef IDVal = DirectiveID.getString();
2498 
2499   if (IDVal == ".hsa_code_object_version")
2500     return ParseDirectiveHSACodeObjectVersion();
2501 
2502   if (IDVal == ".hsa_code_object_isa")
2503     return ParseDirectiveHSACodeObjectISA();
2504 
2505   if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
2506     return ParseDirectiveCodeObjectMetadata();
2507 
2508   if (IDVal == ".amd_kernel_code_t")
2509     return ParseDirectiveAMDKernelCodeT();
2510 
2511   if (IDVal == ".amdgpu_hsa_kernel")
2512     return ParseDirectiveAMDGPUHsaKernel();
2513 
2514   return true;
2515 }
2516 
2517 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2518                                            unsigned RegNo) const {
2519   if (isCI())
2520     return true;
2521 
2522   if (isSI()) {
2523     // No flat_scr
2524     switch (RegNo) {
2525     case AMDGPU::FLAT_SCR:
2526     case AMDGPU::FLAT_SCR_LO:
2527     case AMDGPU::FLAT_SCR_HI:
2528       return false;
2529     default:
2530       return true;
2531     }
2532   }
2533 
2534   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2535   // SI/CI have.
2536   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2537        R.isValid(); ++R) {
2538     if (*R == RegNo)
2539       return false;
2540   }
2541 
2542   return true;
2543 }
2544 
2545 OperandMatchResultTy
2546 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2547   // Try to parse with a custom parser
2548   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2549 
2550   // If we successfully parsed the operand or if there as an error parsing,
2551   // we are done.
2552   //
2553   // If we are parsing after we reach EndOfStatement then this means we
2554   // are appending default values to the Operands list.  This is only done
2555   // by custom parser, so we shouldn't continue on to the generic parsing.
2556   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2557       getLexer().is(AsmToken::EndOfStatement))
2558     return ResTy;
2559 
2560   ResTy = parseRegOrImm(Operands);
2561 
2562   if (ResTy == MatchOperand_Success)
2563     return ResTy;
2564 
2565   const auto &Tok = Parser.getTok();
2566   SMLoc S = Tok.getLoc();
2567 
2568   const MCExpr *Expr = nullptr;
2569   if (!Parser.parseExpression(Expr)) {
2570     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2571     return MatchOperand_Success;
2572   }
2573 
2574   // Possibly this is an instruction flag like 'gds'.
2575   if (Tok.getKind() == AsmToken::Identifier) {
2576     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2577     Parser.Lex();
2578     return MatchOperand_Success;
2579   }
2580 
2581   return MatchOperand_NoMatch;
2582 }
2583 
2584 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2585   // Clear any forced encodings from the previous instruction.
2586   setForcedEncodingSize(0);
2587   setForcedDPP(false);
2588   setForcedSDWA(false);
2589 
2590   if (Name.endswith("_e64")) {
2591     setForcedEncodingSize(64);
2592     return Name.substr(0, Name.size() - 4);
2593   } else if (Name.endswith("_e32")) {
2594     setForcedEncodingSize(32);
2595     return Name.substr(0, Name.size() - 4);
2596   } else if (Name.endswith("_dpp")) {
2597     setForcedDPP(true);
2598     return Name.substr(0, Name.size() - 4);
2599   } else if (Name.endswith("_sdwa")) {
2600     setForcedSDWA(true);
2601     return Name.substr(0, Name.size() - 5);
2602   }
2603   return Name;
2604 }
2605 
2606 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2607                                        StringRef Name,
2608                                        SMLoc NameLoc, OperandVector &Operands) {
2609   // Add the instruction mnemonic
2610   Name = parseMnemonicSuffix(Name);
2611   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2612 
2613   while (!getLexer().is(AsmToken::EndOfStatement)) {
2614     OperandMatchResultTy Res = parseOperand(Operands, Name);
2615 
2616     // Eat the comma or space if there is one.
2617     if (getLexer().is(AsmToken::Comma))
2618       Parser.Lex();
2619 
2620     switch (Res) {
2621       case MatchOperand_Success: break;
2622       case MatchOperand_ParseFail:
2623         Error(getLexer().getLoc(), "failed parsing operand.");
2624         while (!getLexer().is(AsmToken::EndOfStatement)) {
2625           Parser.Lex();
2626         }
2627         return true;
2628       case MatchOperand_NoMatch:
2629         Error(getLexer().getLoc(), "not a valid operand.");
2630         while (!getLexer().is(AsmToken::EndOfStatement)) {
2631           Parser.Lex();
2632         }
2633         return true;
2634     }
2635   }
2636 
2637   return false;
2638 }
2639 
2640 //===----------------------------------------------------------------------===//
2641 // Utility functions
2642 //===----------------------------------------------------------------------===//
2643 
2644 OperandMatchResultTy
2645 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2646   switch(getLexer().getKind()) {
2647     default: return MatchOperand_NoMatch;
2648     case AsmToken::Identifier: {
2649       StringRef Name = Parser.getTok().getString();
2650       if (!Name.equals(Prefix)) {
2651         return MatchOperand_NoMatch;
2652       }
2653 
2654       Parser.Lex();
2655       if (getLexer().isNot(AsmToken::Colon))
2656         return MatchOperand_ParseFail;
2657 
2658       Parser.Lex();
2659 
2660       bool IsMinus = false;
2661       if (getLexer().getKind() == AsmToken::Minus) {
2662         Parser.Lex();
2663         IsMinus = true;
2664       }
2665 
2666       if (getLexer().isNot(AsmToken::Integer))
2667         return MatchOperand_ParseFail;
2668 
2669       if (getParser().parseAbsoluteExpression(Int))
2670         return MatchOperand_ParseFail;
2671 
2672       if (IsMinus)
2673         Int = -Int;
2674       break;
2675     }
2676   }
2677   return MatchOperand_Success;
2678 }
2679 
2680 OperandMatchResultTy
2681 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2682                                     AMDGPUOperand::ImmTy ImmTy,
2683                                     bool (*ConvertResult)(int64_t&)) {
2684   SMLoc S = Parser.getTok().getLoc();
2685   int64_t Value = 0;
2686 
2687   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2688   if (Res != MatchOperand_Success)
2689     return Res;
2690 
2691   if (ConvertResult && !ConvertResult(Value)) {
2692     return MatchOperand_ParseFail;
2693   }
2694 
2695   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2696   return MatchOperand_Success;
2697 }
2698 
2699 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2700   const char *Prefix,
2701   OperandVector &Operands,
2702   AMDGPUOperand::ImmTy ImmTy,
2703   bool (*ConvertResult)(int64_t&)) {
2704   StringRef Name = Parser.getTok().getString();
2705   if (!Name.equals(Prefix))
2706     return MatchOperand_NoMatch;
2707 
2708   Parser.Lex();
2709   if (getLexer().isNot(AsmToken::Colon))
2710     return MatchOperand_ParseFail;
2711 
2712   Parser.Lex();
2713   if (getLexer().isNot(AsmToken::LBrac))
2714     return MatchOperand_ParseFail;
2715   Parser.Lex();
2716 
2717   unsigned Val = 0;
2718   SMLoc S = Parser.getTok().getLoc();
2719 
2720   // FIXME: How to verify the number of elements matches the number of src
2721   // operands?
2722   for (int I = 0; I < 4; ++I) {
2723     if (I != 0) {
2724       if (getLexer().is(AsmToken::RBrac))
2725         break;
2726 
2727       if (getLexer().isNot(AsmToken::Comma))
2728         return MatchOperand_ParseFail;
2729       Parser.Lex();
2730     }
2731 
2732     if (getLexer().isNot(AsmToken::Integer))
2733       return MatchOperand_ParseFail;
2734 
2735     int64_t Op;
2736     if (getParser().parseAbsoluteExpression(Op))
2737       return MatchOperand_ParseFail;
2738 
2739     if (Op != 0 && Op != 1)
2740       return MatchOperand_ParseFail;
2741     Val |= (Op << I);
2742   }
2743 
2744   Parser.Lex();
2745   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2746   return MatchOperand_Success;
2747 }
2748 
2749 OperandMatchResultTy
2750 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2751                                AMDGPUOperand::ImmTy ImmTy) {
2752   int64_t Bit = 0;
2753   SMLoc S = Parser.getTok().getLoc();
2754 
2755   // We are at the end of the statement, and this is a default argument, so
2756   // use a default value.
2757   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2758     switch(getLexer().getKind()) {
2759       case AsmToken::Identifier: {
2760         StringRef Tok = Parser.getTok().getString();
2761         if (Tok == Name) {
2762           Bit = 1;
2763           Parser.Lex();
2764         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2765           Bit = 0;
2766           Parser.Lex();
2767         } else {
2768           return MatchOperand_NoMatch;
2769         }
2770         break;
2771       }
2772       default:
2773         return MatchOperand_NoMatch;
2774     }
2775   }
2776 
2777   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2778   return MatchOperand_Success;
2779 }
2780 
2781 static void addOptionalImmOperand(
2782   MCInst& Inst, const OperandVector& Operands,
2783   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2784   AMDGPUOperand::ImmTy ImmT,
2785   int64_t Default = 0) {
2786   auto i = OptionalIdx.find(ImmT);
2787   if (i != OptionalIdx.end()) {
2788     unsigned Idx = i->second;
2789     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2790   } else {
2791     Inst.addOperand(MCOperand::createImm(Default));
2792   }
2793 }
2794 
2795 OperandMatchResultTy
2796 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2797   if (getLexer().isNot(AsmToken::Identifier)) {
2798     return MatchOperand_NoMatch;
2799   }
2800   StringRef Tok = Parser.getTok().getString();
2801   if (Tok != Prefix) {
2802     return MatchOperand_NoMatch;
2803   }
2804 
2805   Parser.Lex();
2806   if (getLexer().isNot(AsmToken::Colon)) {
2807     return MatchOperand_ParseFail;
2808   }
2809 
2810   Parser.Lex();
2811   if (getLexer().isNot(AsmToken::Identifier)) {
2812     return MatchOperand_ParseFail;
2813   }
2814 
2815   Value = Parser.getTok().getString();
2816   return MatchOperand_Success;
2817 }
2818 
2819 //===----------------------------------------------------------------------===//
2820 // ds
2821 //===----------------------------------------------------------------------===//
2822 
2823 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2824                                     const OperandVector &Operands) {
2825   OptionalImmIndexMap OptionalIdx;
2826 
2827   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2828     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2829 
2830     // Add the register arguments
2831     if (Op.isReg()) {
2832       Op.addRegOperands(Inst, 1);
2833       continue;
2834     }
2835 
2836     // Handle optional arguments
2837     OptionalIdx[Op.getImmTy()] = i;
2838   }
2839 
2840   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2841   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2842   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2843 
2844   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2845 }
2846 
2847 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2848                                 bool IsGdsHardcoded) {
2849   OptionalImmIndexMap OptionalIdx;
2850 
2851   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2852     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2853 
2854     // Add the register arguments
2855     if (Op.isReg()) {
2856       Op.addRegOperands(Inst, 1);
2857       continue;
2858     }
2859 
2860     if (Op.isToken() && Op.getToken() == "gds") {
2861       IsGdsHardcoded = true;
2862       continue;
2863     }
2864 
2865     // Handle optional arguments
2866     OptionalIdx[Op.getImmTy()] = i;
2867   }
2868 
2869   AMDGPUOperand::ImmTy OffsetType =
2870     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2871      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2872                                                       AMDGPUOperand::ImmTyOffset;
2873 
2874   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2875 
2876   if (!IsGdsHardcoded) {
2877     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2878   }
2879   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2880 }
2881 
2882 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2883   OptionalImmIndexMap OptionalIdx;
2884 
2885   unsigned OperandIdx[4];
2886   unsigned EnMask = 0;
2887   int SrcIdx = 0;
2888 
2889   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2890     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2891 
2892     // Add the register arguments
2893     if (Op.isReg()) {
2894       assert(SrcIdx < 4);
2895       OperandIdx[SrcIdx] = Inst.size();
2896       Op.addRegOperands(Inst, 1);
2897       ++SrcIdx;
2898       continue;
2899     }
2900 
2901     if (Op.isOff()) {
2902       assert(SrcIdx < 4);
2903       OperandIdx[SrcIdx] = Inst.size();
2904       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2905       ++SrcIdx;
2906       continue;
2907     }
2908 
2909     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2910       Op.addImmOperands(Inst, 1);
2911       continue;
2912     }
2913 
2914     if (Op.isToken() && Op.getToken() == "done")
2915       continue;
2916 
2917     // Handle optional arguments
2918     OptionalIdx[Op.getImmTy()] = i;
2919   }
2920 
2921   assert(SrcIdx == 4);
2922 
2923   bool Compr = false;
2924   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2925     Compr = true;
2926     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2927     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2928     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2929   }
2930 
2931   for (auto i = 0; i < SrcIdx; ++i) {
2932     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2933       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2934     }
2935   }
2936 
2937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
2938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
2939 
2940   Inst.addOperand(MCOperand::createImm(EnMask));
2941 }
2942 
2943 //===----------------------------------------------------------------------===//
2944 // s_waitcnt
2945 //===----------------------------------------------------------------------===//
2946 
2947 static bool
2948 encodeCnt(
2949   const AMDGPU::IsaInfo::IsaVersion ISA,
2950   int64_t &IntVal,
2951   int64_t CntVal,
2952   bool Saturate,
2953   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
2954   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
2955 {
2956   bool Failed = false;
2957 
2958   IntVal = encode(ISA, IntVal, CntVal);
2959   if (CntVal != decode(ISA, IntVal)) {
2960     if (Saturate) {
2961       IntVal = encode(ISA, IntVal, -1);
2962     } else {
2963       Failed = true;
2964     }
2965   }
2966   return Failed;
2967 }
2968 
2969 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
2970   StringRef CntName = Parser.getTok().getString();
2971   int64_t CntVal;
2972 
2973   Parser.Lex();
2974   if (getLexer().isNot(AsmToken::LParen))
2975     return true;
2976 
2977   Parser.Lex();
2978   if (getLexer().isNot(AsmToken::Integer))
2979     return true;
2980 
2981   SMLoc ValLoc = Parser.getTok().getLoc();
2982   if (getParser().parseAbsoluteExpression(CntVal))
2983     return true;
2984 
2985   AMDGPU::IsaInfo::IsaVersion ISA =
2986       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2987 
2988   bool Failed = true;
2989   bool Sat = CntName.endswith("_sat");
2990 
2991   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
2992     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
2993   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
2994     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
2995   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
2996     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
2997   }
2998 
2999   if (Failed) {
3000     Error(ValLoc, "too large value for " + CntName);
3001     return true;
3002   }
3003 
3004   if (getLexer().isNot(AsmToken::RParen)) {
3005     return true;
3006   }
3007 
3008   Parser.Lex();
3009   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3010     const AsmToken NextToken = getLexer().peekTok();
3011     if (NextToken.is(AsmToken::Identifier)) {
3012       Parser.Lex();
3013     }
3014   }
3015 
3016   return false;
3017 }
3018 
3019 OperandMatchResultTy
3020 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3021   AMDGPU::IsaInfo::IsaVersion ISA =
3022       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3023   int64_t Waitcnt = getWaitcntBitMask(ISA);
3024   SMLoc S = Parser.getTok().getLoc();
3025 
3026   switch(getLexer().getKind()) {
3027     default: return MatchOperand_ParseFail;
3028     case AsmToken::Integer:
3029       // The operand can be an integer value.
3030       if (getParser().parseAbsoluteExpression(Waitcnt))
3031         return MatchOperand_ParseFail;
3032       break;
3033 
3034     case AsmToken::Identifier:
3035       do {
3036         if (parseCnt(Waitcnt))
3037           return MatchOperand_ParseFail;
3038       } while(getLexer().isNot(AsmToken::EndOfStatement));
3039       break;
3040   }
3041   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3042   return MatchOperand_Success;
3043 }
3044 
3045 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3046                                           int64_t &Width) {
3047   using namespace llvm::AMDGPU::Hwreg;
3048 
3049   if (Parser.getTok().getString() != "hwreg")
3050     return true;
3051   Parser.Lex();
3052 
3053   if (getLexer().isNot(AsmToken::LParen))
3054     return true;
3055   Parser.Lex();
3056 
3057   if (getLexer().is(AsmToken::Identifier)) {
3058     HwReg.IsSymbolic = true;
3059     HwReg.Id = ID_UNKNOWN_;
3060     const StringRef tok = Parser.getTok().getString();
3061     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3062       if (tok == IdSymbolic[i]) {
3063         HwReg.Id = i;
3064         break;
3065       }
3066     }
3067     Parser.Lex();
3068   } else {
3069     HwReg.IsSymbolic = false;
3070     if (getLexer().isNot(AsmToken::Integer))
3071       return true;
3072     if (getParser().parseAbsoluteExpression(HwReg.Id))
3073       return true;
3074   }
3075 
3076   if (getLexer().is(AsmToken::RParen)) {
3077     Parser.Lex();
3078     return false;
3079   }
3080 
3081   // optional params
3082   if (getLexer().isNot(AsmToken::Comma))
3083     return true;
3084   Parser.Lex();
3085 
3086   if (getLexer().isNot(AsmToken::Integer))
3087     return true;
3088   if (getParser().parseAbsoluteExpression(Offset))
3089     return true;
3090 
3091   if (getLexer().isNot(AsmToken::Comma))
3092     return true;
3093   Parser.Lex();
3094 
3095   if (getLexer().isNot(AsmToken::Integer))
3096     return true;
3097   if (getParser().parseAbsoluteExpression(Width))
3098     return true;
3099 
3100   if (getLexer().isNot(AsmToken::RParen))
3101     return true;
3102   Parser.Lex();
3103 
3104   return false;
3105 }
3106 
3107 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3108   using namespace llvm::AMDGPU::Hwreg;
3109 
3110   int64_t Imm16Val = 0;
3111   SMLoc S = Parser.getTok().getLoc();
3112 
3113   switch(getLexer().getKind()) {
3114     default: return MatchOperand_NoMatch;
3115     case AsmToken::Integer:
3116       // The operand can be an integer value.
3117       if (getParser().parseAbsoluteExpression(Imm16Val))
3118         return MatchOperand_NoMatch;
3119       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3120         Error(S, "invalid immediate: only 16-bit values are legal");
3121         // Do not return error code, but create an imm operand anyway and proceed
3122         // to the next operand, if any. That avoids unneccessary error messages.
3123       }
3124       break;
3125 
3126     case AsmToken::Identifier: {
3127         OperandInfoTy HwReg(ID_UNKNOWN_);
3128         int64_t Offset = OFFSET_DEFAULT_;
3129         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3130         if (parseHwregConstruct(HwReg, Offset, Width))
3131           return MatchOperand_ParseFail;
3132         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3133           if (HwReg.IsSymbolic)
3134             Error(S, "invalid symbolic name of hardware register");
3135           else
3136             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3137         }
3138         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3139           Error(S, "invalid bit offset: only 5-bit values are legal");
3140         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3141           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3142         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3143       }
3144       break;
3145   }
3146   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3147   return MatchOperand_Success;
3148 }
3149 
3150 bool AMDGPUOperand::isSWaitCnt() const {
3151   return isImm();
3152 }
3153 
3154 bool AMDGPUOperand::isHwreg() const {
3155   return isImmTy(ImmTyHwreg);
3156 }
3157 
3158 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3159   using namespace llvm::AMDGPU::SendMsg;
3160 
3161   if (Parser.getTok().getString() != "sendmsg")
3162     return true;
3163   Parser.Lex();
3164 
3165   if (getLexer().isNot(AsmToken::LParen))
3166     return true;
3167   Parser.Lex();
3168 
3169   if (getLexer().is(AsmToken::Identifier)) {
3170     Msg.IsSymbolic = true;
3171     Msg.Id = ID_UNKNOWN_;
3172     const std::string tok = Parser.getTok().getString();
3173     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3174       switch(i) {
3175         default: continue; // Omit gaps.
3176         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3177       }
3178       if (tok == IdSymbolic[i]) {
3179         Msg.Id = i;
3180         break;
3181       }
3182     }
3183     Parser.Lex();
3184   } else {
3185     Msg.IsSymbolic = false;
3186     if (getLexer().isNot(AsmToken::Integer))
3187       return true;
3188     if (getParser().parseAbsoluteExpression(Msg.Id))
3189       return true;
3190     if (getLexer().is(AsmToken::Integer))
3191       if (getParser().parseAbsoluteExpression(Msg.Id))
3192         Msg.Id = ID_UNKNOWN_;
3193   }
3194   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3195     return false;
3196 
3197   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3198     if (getLexer().isNot(AsmToken::RParen))
3199       return true;
3200     Parser.Lex();
3201     return false;
3202   }
3203 
3204   if (getLexer().isNot(AsmToken::Comma))
3205     return true;
3206   Parser.Lex();
3207 
3208   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3209   Operation.Id = ID_UNKNOWN_;
3210   if (getLexer().is(AsmToken::Identifier)) {
3211     Operation.IsSymbolic = true;
3212     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3213     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3214     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3215     const StringRef Tok = Parser.getTok().getString();
3216     for (int i = F; i < L; ++i) {
3217       if (Tok == S[i]) {
3218         Operation.Id = i;
3219         break;
3220       }
3221     }
3222     Parser.Lex();
3223   } else {
3224     Operation.IsSymbolic = false;
3225     if (getLexer().isNot(AsmToken::Integer))
3226       return true;
3227     if (getParser().parseAbsoluteExpression(Operation.Id))
3228       return true;
3229   }
3230 
3231   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3232     // Stream id is optional.
3233     if (getLexer().is(AsmToken::RParen)) {
3234       Parser.Lex();
3235       return false;
3236     }
3237 
3238     if (getLexer().isNot(AsmToken::Comma))
3239       return true;
3240     Parser.Lex();
3241 
3242     if (getLexer().isNot(AsmToken::Integer))
3243       return true;
3244     if (getParser().parseAbsoluteExpression(StreamId))
3245       return true;
3246   }
3247 
3248   if (getLexer().isNot(AsmToken::RParen))
3249     return true;
3250   Parser.Lex();
3251   return false;
3252 }
3253 
3254 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3255   if (getLexer().getKind() != AsmToken::Identifier)
3256     return MatchOperand_NoMatch;
3257 
3258   StringRef Str = Parser.getTok().getString();
3259   int Slot = StringSwitch<int>(Str)
3260     .Case("p10", 0)
3261     .Case("p20", 1)
3262     .Case("p0", 2)
3263     .Default(-1);
3264 
3265   SMLoc S = Parser.getTok().getLoc();
3266   if (Slot == -1)
3267     return MatchOperand_ParseFail;
3268 
3269   Parser.Lex();
3270   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3271                                               AMDGPUOperand::ImmTyInterpSlot));
3272   return MatchOperand_Success;
3273 }
3274 
3275 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3276   if (getLexer().getKind() != AsmToken::Identifier)
3277     return MatchOperand_NoMatch;
3278 
3279   StringRef Str = Parser.getTok().getString();
3280   if (!Str.startswith("attr"))
3281     return MatchOperand_NoMatch;
3282 
3283   StringRef Chan = Str.take_back(2);
3284   int AttrChan = StringSwitch<int>(Chan)
3285     .Case(".x", 0)
3286     .Case(".y", 1)
3287     .Case(".z", 2)
3288     .Case(".w", 3)
3289     .Default(-1);
3290   if (AttrChan == -1)
3291     return MatchOperand_ParseFail;
3292 
3293   Str = Str.drop_back(2).drop_front(4);
3294 
3295   uint8_t Attr;
3296   if (Str.getAsInteger(10, Attr))
3297     return MatchOperand_ParseFail;
3298 
3299   SMLoc S = Parser.getTok().getLoc();
3300   Parser.Lex();
3301   if (Attr > 63) {
3302     Error(S, "out of bounds attr");
3303     return MatchOperand_Success;
3304   }
3305 
3306   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3307 
3308   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3309                                               AMDGPUOperand::ImmTyInterpAttr));
3310   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3311                                               AMDGPUOperand::ImmTyAttrChan));
3312   return MatchOperand_Success;
3313 }
3314 
3315 void AMDGPUAsmParser::errorExpTgt() {
3316   Error(Parser.getTok().getLoc(), "invalid exp target");
3317 }
3318 
3319 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3320                                                       uint8_t &Val) {
3321   if (Str == "null") {
3322     Val = 9;
3323     return MatchOperand_Success;
3324   }
3325 
3326   if (Str.startswith("mrt")) {
3327     Str = Str.drop_front(3);
3328     if (Str == "z") { // == mrtz
3329       Val = 8;
3330       return MatchOperand_Success;
3331     }
3332 
3333     if (Str.getAsInteger(10, Val))
3334       return MatchOperand_ParseFail;
3335 
3336     if (Val > 7)
3337       errorExpTgt();
3338 
3339     return MatchOperand_Success;
3340   }
3341 
3342   if (Str.startswith("pos")) {
3343     Str = Str.drop_front(3);
3344     if (Str.getAsInteger(10, Val))
3345       return MatchOperand_ParseFail;
3346 
3347     if (Val > 3)
3348       errorExpTgt();
3349 
3350     Val += 12;
3351     return MatchOperand_Success;
3352   }
3353 
3354   if (Str.startswith("param")) {
3355     Str = Str.drop_front(5);
3356     if (Str.getAsInteger(10, Val))
3357       return MatchOperand_ParseFail;
3358 
3359     if (Val >= 32)
3360       errorExpTgt();
3361 
3362     Val += 32;
3363     return MatchOperand_Success;
3364   }
3365 
3366   if (Str.startswith("invalid_target_")) {
3367     Str = Str.drop_front(15);
3368     if (Str.getAsInteger(10, Val))
3369       return MatchOperand_ParseFail;
3370 
3371     errorExpTgt();
3372     return MatchOperand_Success;
3373   }
3374 
3375   return MatchOperand_NoMatch;
3376 }
3377 
3378 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3379   uint8_t Val;
3380   StringRef Str = Parser.getTok().getString();
3381 
3382   auto Res = parseExpTgtImpl(Str, Val);
3383   if (Res != MatchOperand_Success)
3384     return Res;
3385 
3386   SMLoc S = Parser.getTok().getLoc();
3387   Parser.Lex();
3388 
3389   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3390                                               AMDGPUOperand::ImmTyExpTgt));
3391   return MatchOperand_Success;
3392 }
3393 
3394 OperandMatchResultTy
3395 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3396   using namespace llvm::AMDGPU::SendMsg;
3397 
3398   int64_t Imm16Val = 0;
3399   SMLoc S = Parser.getTok().getLoc();
3400 
3401   switch(getLexer().getKind()) {
3402   default:
3403     return MatchOperand_NoMatch;
3404   case AsmToken::Integer:
3405     // The operand can be an integer value.
3406     if (getParser().parseAbsoluteExpression(Imm16Val))
3407       return MatchOperand_NoMatch;
3408     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3409       Error(S, "invalid immediate: only 16-bit values are legal");
3410       // Do not return error code, but create an imm operand anyway and proceed
3411       // to the next operand, if any. That avoids unneccessary error messages.
3412     }
3413     break;
3414   case AsmToken::Identifier: {
3415       OperandInfoTy Msg(ID_UNKNOWN_);
3416       OperandInfoTy Operation(OP_UNKNOWN_);
3417       int64_t StreamId = STREAM_ID_DEFAULT_;
3418       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3419         return MatchOperand_ParseFail;
3420       do {
3421         // Validate and encode message ID.
3422         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3423                 || Msg.Id == ID_SYSMSG)) {
3424           if (Msg.IsSymbolic)
3425             Error(S, "invalid/unsupported symbolic name of message");
3426           else
3427             Error(S, "invalid/unsupported code of message");
3428           break;
3429         }
3430         Imm16Val = (Msg.Id << ID_SHIFT_);
3431         // Validate and encode operation ID.
3432         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3433           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3434             if (Operation.IsSymbolic)
3435               Error(S, "invalid symbolic name of GS_OP");
3436             else
3437               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3438             break;
3439           }
3440           if (Operation.Id == OP_GS_NOP
3441               && Msg.Id != ID_GS_DONE) {
3442             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3443             break;
3444           }
3445           Imm16Val |= (Operation.Id << OP_SHIFT_);
3446         }
3447         if (Msg.Id == ID_SYSMSG) {
3448           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3449             if (Operation.IsSymbolic)
3450               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3451             else
3452               Error(S, "invalid/unsupported code of SYSMSG_OP");
3453             break;
3454           }
3455           Imm16Val |= (Operation.Id << OP_SHIFT_);
3456         }
3457         // Validate and encode stream ID.
3458         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3459           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3460             Error(S, "invalid stream id: only 2-bit values are legal");
3461             break;
3462           }
3463           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3464         }
3465       } while (false);
3466     }
3467     break;
3468   }
3469   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3470   return MatchOperand_Success;
3471 }
3472 
3473 bool AMDGPUOperand::isSendMsg() const {
3474   return isImmTy(ImmTySendMsg);
3475 }
3476 
3477 //===----------------------------------------------------------------------===//
3478 // parser helpers
3479 //===----------------------------------------------------------------------===//
3480 
3481 bool
3482 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3483   if (getLexer().getKind() == AsmToken::Identifier &&
3484       Parser.getTok().getString() == Id) {
3485     Parser.Lex();
3486     return true;
3487   }
3488   return false;
3489 }
3490 
3491 bool
3492 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3493   if (getLexer().getKind() == Kind) {
3494     Parser.Lex();
3495     return true;
3496   }
3497   return false;
3498 }
3499 
3500 bool
3501 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3502                            const StringRef ErrMsg) {
3503   if (!trySkipToken(Kind)) {
3504     Error(Parser.getTok().getLoc(), ErrMsg);
3505     return false;
3506   }
3507   return true;
3508 }
3509 
3510 bool
3511 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3512   return !getParser().parseAbsoluteExpression(Imm);
3513 }
3514 
3515 bool
3516 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3517   SMLoc S = Parser.getTok().getLoc();
3518   if (getLexer().getKind() == AsmToken::String) {
3519     Val = Parser.getTok().getStringContents();
3520     Parser.Lex();
3521     return true;
3522   } else {
3523     Error(S, ErrMsg);
3524     return false;
3525   }
3526 }
3527 
3528 //===----------------------------------------------------------------------===//
3529 // swizzle
3530 //===----------------------------------------------------------------------===//
3531 
3532 LLVM_READNONE
3533 static unsigned
3534 encodeBitmaskPerm(const unsigned AndMask,
3535                   const unsigned OrMask,
3536                   const unsigned XorMask) {
3537   using namespace llvm::AMDGPU::Swizzle;
3538 
3539   return BITMASK_PERM_ENC |
3540          (AndMask << BITMASK_AND_SHIFT) |
3541          (OrMask  << BITMASK_OR_SHIFT)  |
3542          (XorMask << BITMASK_XOR_SHIFT);
3543 }
3544 
3545 bool
3546 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3547                                       const unsigned MinVal,
3548                                       const unsigned MaxVal,
3549                                       const StringRef ErrMsg) {
3550   for (unsigned i = 0; i < OpNum; ++i) {
3551     if (!skipToken(AsmToken::Comma, "expected a comma")){
3552       return false;
3553     }
3554     SMLoc ExprLoc = Parser.getTok().getLoc();
3555     if (!parseExpr(Op[i])) {
3556       return false;
3557     }
3558     if (Op[i] < MinVal || Op[i] > MaxVal) {
3559       Error(ExprLoc, ErrMsg);
3560       return false;
3561     }
3562   }
3563 
3564   return true;
3565 }
3566 
3567 bool
3568 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3569   using namespace llvm::AMDGPU::Swizzle;
3570 
3571   int64_t Lane[LANE_NUM];
3572   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3573                            "expected a 2-bit lane id")) {
3574     Imm = QUAD_PERM_ENC;
3575     for (auto i = 0; i < LANE_NUM; ++i) {
3576       Imm |= Lane[i] << (LANE_SHIFT * i);
3577     }
3578     return true;
3579   }
3580   return false;
3581 }
3582 
3583 bool
3584 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3585   using namespace llvm::AMDGPU::Swizzle;
3586 
3587   SMLoc S = Parser.getTok().getLoc();
3588   int64_t GroupSize;
3589   int64_t LaneIdx;
3590 
3591   if (!parseSwizzleOperands(1, &GroupSize,
3592                             2, 32,
3593                             "group size must be in the interval [2,32]")) {
3594     return false;
3595   }
3596   if (!isPowerOf2_64(GroupSize)) {
3597     Error(S, "group size must be a power of two");
3598     return false;
3599   }
3600   if (parseSwizzleOperands(1, &LaneIdx,
3601                            0, GroupSize - 1,
3602                            "lane id must be in the interval [0,group size - 1]")) {
3603     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3604     return true;
3605   }
3606   return false;
3607 }
3608 
3609 bool
3610 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3611   using namespace llvm::AMDGPU::Swizzle;
3612 
3613   SMLoc S = Parser.getTok().getLoc();
3614   int64_t GroupSize;
3615 
3616   if (!parseSwizzleOperands(1, &GroupSize,
3617       2, 32, "group size must be in the interval [2,32]")) {
3618     return false;
3619   }
3620   if (!isPowerOf2_64(GroupSize)) {
3621     Error(S, "group size must be a power of two");
3622     return false;
3623   }
3624 
3625   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3626   return true;
3627 }
3628 
3629 bool
3630 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3631   using namespace llvm::AMDGPU::Swizzle;
3632 
3633   SMLoc S = Parser.getTok().getLoc();
3634   int64_t GroupSize;
3635 
3636   if (!parseSwizzleOperands(1, &GroupSize,
3637       1, 16, "group size must be in the interval [1,16]")) {
3638     return false;
3639   }
3640   if (!isPowerOf2_64(GroupSize)) {
3641     Error(S, "group size must be a power of two");
3642     return false;
3643   }
3644 
3645   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3646   return true;
3647 }
3648 
3649 bool
3650 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3651   using namespace llvm::AMDGPU::Swizzle;
3652 
3653   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3654     return false;
3655   }
3656 
3657   StringRef Ctl;
3658   SMLoc StrLoc = Parser.getTok().getLoc();
3659   if (!parseString(Ctl)) {
3660     return false;
3661   }
3662   if (Ctl.size() != BITMASK_WIDTH) {
3663     Error(StrLoc, "expected a 5-character mask");
3664     return false;
3665   }
3666 
3667   unsigned AndMask = 0;
3668   unsigned OrMask = 0;
3669   unsigned XorMask = 0;
3670 
3671   for (size_t i = 0; i < Ctl.size(); ++i) {
3672     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3673     switch(Ctl[i]) {
3674     default:
3675       Error(StrLoc, "invalid mask");
3676       return false;
3677     case '0':
3678       break;
3679     case '1':
3680       OrMask |= Mask;
3681       break;
3682     case 'p':
3683       AndMask |= Mask;
3684       break;
3685     case 'i':
3686       AndMask |= Mask;
3687       XorMask |= Mask;
3688       break;
3689     }
3690   }
3691 
3692   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3693   return true;
3694 }
3695 
3696 bool
3697 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3698 
3699   SMLoc OffsetLoc = Parser.getTok().getLoc();
3700 
3701   if (!parseExpr(Imm)) {
3702     return false;
3703   }
3704   if (!isUInt<16>(Imm)) {
3705     Error(OffsetLoc, "expected a 16-bit offset");
3706     return false;
3707   }
3708   return true;
3709 }
3710 
3711 bool
3712 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3713   using namespace llvm::AMDGPU::Swizzle;
3714 
3715   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3716 
3717     SMLoc ModeLoc = Parser.getTok().getLoc();
3718     bool Ok = false;
3719 
3720     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3721       Ok = parseSwizzleQuadPerm(Imm);
3722     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3723       Ok = parseSwizzleBitmaskPerm(Imm);
3724     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3725       Ok = parseSwizzleBroadcast(Imm);
3726     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3727       Ok = parseSwizzleSwap(Imm);
3728     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3729       Ok = parseSwizzleReverse(Imm);
3730     } else {
3731       Error(ModeLoc, "expected a swizzle mode");
3732     }
3733 
3734     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3735   }
3736 
3737   return false;
3738 }
3739 
3740 OperandMatchResultTy
3741 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3742   SMLoc S = Parser.getTok().getLoc();
3743   int64_t Imm = 0;
3744 
3745   if (trySkipId("offset")) {
3746 
3747     bool Ok = false;
3748     if (skipToken(AsmToken::Colon, "expected a colon")) {
3749       if (trySkipId("swizzle")) {
3750         Ok = parseSwizzleMacro(Imm);
3751       } else {
3752         Ok = parseSwizzleOffset(Imm);
3753       }
3754     }
3755 
3756     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3757 
3758     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3759   } else {
3760     return MatchOperand_NoMatch;
3761   }
3762 }
3763 
3764 bool
3765 AMDGPUOperand::isSwizzle() const {
3766   return isImmTy(ImmTySwizzle);
3767 }
3768 
3769 //===----------------------------------------------------------------------===//
3770 // sopp branch targets
3771 //===----------------------------------------------------------------------===//
3772 
3773 OperandMatchResultTy
3774 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3775   SMLoc S = Parser.getTok().getLoc();
3776 
3777   switch (getLexer().getKind()) {
3778     default: return MatchOperand_ParseFail;
3779     case AsmToken::Integer: {
3780       int64_t Imm;
3781       if (getParser().parseAbsoluteExpression(Imm))
3782         return MatchOperand_ParseFail;
3783       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3784       return MatchOperand_Success;
3785     }
3786 
3787     case AsmToken::Identifier:
3788       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3789           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3790                                   Parser.getTok().getString()), getContext()), S));
3791       Parser.Lex();
3792       return MatchOperand_Success;
3793   }
3794 }
3795 
3796 //===----------------------------------------------------------------------===//
3797 // mubuf
3798 //===----------------------------------------------------------------------===//
3799 
3800 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3801   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3802 }
3803 
3804 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3805   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3806 }
3807 
3808 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3809   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3810 }
3811 
3812 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3813                                const OperandVector &Operands,
3814                                bool IsAtomic, bool IsAtomicReturn) {
3815   OptionalImmIndexMap OptionalIdx;
3816   assert(IsAtomicReturn ? IsAtomic : true);
3817 
3818   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3819     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3820 
3821     // Add the register arguments
3822     if (Op.isReg()) {
3823       Op.addRegOperands(Inst, 1);
3824       continue;
3825     }
3826 
3827     // Handle the case where soffset is an immediate
3828     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3829       Op.addImmOperands(Inst, 1);
3830       continue;
3831     }
3832 
3833     // Handle tokens like 'offen' which are sometimes hard-coded into the
3834     // asm string.  There are no MCInst operands for these.
3835     if (Op.isToken()) {
3836       continue;
3837     }
3838     assert(Op.isImm());
3839 
3840     // Handle optional arguments
3841     OptionalIdx[Op.getImmTy()] = i;
3842   }
3843 
3844   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3845   if (IsAtomicReturn) {
3846     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3847     Inst.insert(I, *I);
3848   }
3849 
3850   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3851   if (!IsAtomic) { // glc is hard-coded.
3852     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3853   }
3854   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3855   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3856 }
3857 
3858 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3859   OptionalImmIndexMap OptionalIdx;
3860 
3861   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3862     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3863 
3864     // Add the register arguments
3865     if (Op.isReg()) {
3866       Op.addRegOperands(Inst, 1);
3867       continue;
3868     }
3869 
3870     // Handle the case where soffset is an immediate
3871     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3872       Op.addImmOperands(Inst, 1);
3873       continue;
3874     }
3875 
3876     // Handle tokens like 'offen' which are sometimes hard-coded into the
3877     // asm string.  There are no MCInst operands for these.
3878     if (Op.isToken()) {
3879       continue;
3880     }
3881     assert(Op.isImm());
3882 
3883     // Handle optional arguments
3884     OptionalIdx[Op.getImmTy()] = i;
3885   }
3886 
3887   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3888                         AMDGPUOperand::ImmTyOffset);
3889   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3890   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3891   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3892   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3893   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3894 }
3895 
3896 //===----------------------------------------------------------------------===//
3897 // mimg
3898 //===----------------------------------------------------------------------===//
3899 
3900 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3901                               bool IsAtomic) {
3902   unsigned I = 1;
3903   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3904   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3905     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3906   }
3907 
3908   if (IsAtomic) {
3909     // Add src, same as dst
3910     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3911   }
3912 
3913   OptionalImmIndexMap OptionalIdx;
3914 
3915   for (unsigned E = Operands.size(); I != E; ++I) {
3916     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3917 
3918     // Add the register arguments
3919     if (Op.isRegOrImm()) {
3920       Op.addRegOrImmOperands(Inst, 1);
3921       continue;
3922     } else if (Op.isImmModifier()) {
3923       OptionalIdx[Op.getImmTy()] = I;
3924     } else {
3925       llvm_unreachable("unexpected operand type");
3926     }
3927   }
3928 
3929   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3930   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3931   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3932   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3933   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3934   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3935   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3936   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3937 }
3938 
3939 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
3940   cvtMIMG(Inst, Operands, true);
3941 }
3942 
3943 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
3944   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
3945 }
3946 
3947 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
3948   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
3949 }
3950 
3951 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
3952   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
3953 }
3954 
3955 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
3956   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
3957 }
3958 
3959 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
3960   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
3961 }
3962 
3963 //===----------------------------------------------------------------------===//
3964 // smrd
3965 //===----------------------------------------------------------------------===//
3966 
3967 bool AMDGPUOperand::isSMRDOffset8() const {
3968   return isImm() && isUInt<8>(getImm());
3969 }
3970 
3971 bool AMDGPUOperand::isSMRDOffset20() const {
3972   return isImm() && isUInt<20>(getImm());
3973 }
3974 
3975 bool AMDGPUOperand::isSMRDLiteralOffset() const {
3976   // 32-bit literals are only supported on CI and we only want to use them
3977   // when the offset is > 8-bits.
3978   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
3979 }
3980 
3981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
3982   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3983 }
3984 
3985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
3986   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3987 }
3988 
3989 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
3990   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3991 }
3992 
3993 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
3994   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3995 }
3996 
3997 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
3998   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3999 }
4000 
4001 //===----------------------------------------------------------------------===//
4002 // vop3
4003 //===----------------------------------------------------------------------===//
4004 
4005 static bool ConvertOmodMul(int64_t &Mul) {
4006   if (Mul != 1 && Mul != 2 && Mul != 4)
4007     return false;
4008 
4009   Mul >>= 1;
4010   return true;
4011 }
4012 
4013 static bool ConvertOmodDiv(int64_t &Div) {
4014   if (Div == 1) {
4015     Div = 0;
4016     return true;
4017   }
4018 
4019   if (Div == 2) {
4020     Div = 3;
4021     return true;
4022   }
4023 
4024   return false;
4025 }
4026 
4027 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4028   if (BoundCtrl == 0) {
4029     BoundCtrl = 1;
4030     return true;
4031   }
4032 
4033   if (BoundCtrl == -1) {
4034     BoundCtrl = 0;
4035     return true;
4036   }
4037 
4038   return false;
4039 }
4040 
4041 // Note: the order in this table matches the order of operands in AsmString.
4042 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4043   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4044   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4045   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4046   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4047   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4048   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4049   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4050   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4051   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4052   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4053   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4054   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4055   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4056   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4057   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4058   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4059   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4060   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4061   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4062   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4063   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4064   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4065   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4066   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4067   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4068   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4069   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4070   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4071   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4072   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4073   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4074   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4075   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4076 };
4077 
4078 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4079   OperandMatchResultTy res;
4080   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4081     // try to parse any optional operand here
4082     if (Op.IsBit) {
4083       res = parseNamedBit(Op.Name, Operands, Op.Type);
4084     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4085       res = parseOModOperand(Operands);
4086     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4087                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4088                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4089       res = parseSDWASel(Operands, Op.Name, Op.Type);
4090     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4091       res = parseSDWADstUnused(Operands);
4092     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4093                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4094                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4095                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4096       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4097                                         Op.ConvertResult);
4098     } else {
4099       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4100     }
4101     if (res != MatchOperand_NoMatch) {
4102       return res;
4103     }
4104   }
4105   return MatchOperand_NoMatch;
4106 }
4107 
4108 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4109   StringRef Name = Parser.getTok().getString();
4110   if (Name == "mul") {
4111     return parseIntWithPrefix("mul", Operands,
4112                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4113   }
4114 
4115   if (Name == "div") {
4116     return parseIntWithPrefix("div", Operands,
4117                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4118   }
4119 
4120   return MatchOperand_NoMatch;
4121 }
4122 
4123 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4124   cvtVOP3P(Inst, Operands);
4125 
4126   int Opc = Inst.getOpcode();
4127 
4128   int SrcNum;
4129   const int Ops[] = { AMDGPU::OpName::src0,
4130                       AMDGPU::OpName::src1,
4131                       AMDGPU::OpName::src2 };
4132   for (SrcNum = 0;
4133        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4134        ++SrcNum);
4135   assert(SrcNum > 0);
4136 
4137   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4138   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4139 
4140   if ((OpSel & (1 << SrcNum)) != 0) {
4141     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4142     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4143     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4144   }
4145 }
4146 
4147 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4148       // 1. This operand is input modifiers
4149   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4150       // 2. This is not last operand
4151       && Desc.NumOperands > (OpNum + 1)
4152       // 3. Next operand is register class
4153       && Desc.OpInfo[OpNum + 1].RegClass != -1
4154       // 4. Next register is not tied to any other operand
4155       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4156 }
4157 
4158 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4159 {
4160   OptionalImmIndexMap OptionalIdx;
4161   unsigned Opc = Inst.getOpcode();
4162 
4163   unsigned I = 1;
4164   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4165   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4166     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4167   }
4168 
4169   for (unsigned E = Operands.size(); I != E; ++I) {
4170     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4171     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4172       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4173     } else if (Op.isInterpSlot() ||
4174                Op.isInterpAttr() ||
4175                Op.isAttrChan()) {
4176       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4177     } else if (Op.isImmModifier()) {
4178       OptionalIdx[Op.getImmTy()] = I;
4179     } else {
4180       llvm_unreachable("unhandled operand type");
4181     }
4182   }
4183 
4184   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4185     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4186   }
4187 
4188   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4189     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4190   }
4191 
4192   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4193     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4194   }
4195 }
4196 
4197 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4198                               OptionalImmIndexMap &OptionalIdx) {
4199   unsigned Opc = Inst.getOpcode();
4200 
4201   unsigned I = 1;
4202   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4203   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4204     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4205   }
4206 
4207   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4208     // This instruction has src modifiers
4209     for (unsigned E = Operands.size(); I != E; ++I) {
4210       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4211       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4212         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4213       } else if (Op.isImmModifier()) {
4214         OptionalIdx[Op.getImmTy()] = I;
4215       } else if (Op.isRegOrImm()) {
4216         Op.addRegOrImmOperands(Inst, 1);
4217       } else {
4218         llvm_unreachable("unhandled operand type");
4219       }
4220     }
4221   } else {
4222     // No src modifiers
4223     for (unsigned E = Operands.size(); I != E; ++I) {
4224       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4225       if (Op.isMod()) {
4226         OptionalIdx[Op.getImmTy()] = I;
4227       } else {
4228         Op.addRegOrImmOperands(Inst, 1);
4229       }
4230     }
4231   }
4232 
4233   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4234     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4235   }
4236 
4237   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4238     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4239   }
4240 
4241   // special case v_mac_{f16, f32}:
4242   // it has src2 register operand that is tied to dst operand
4243   // we don't allow modifiers for this operand in assembler so src2_modifiers
4244   // should be 0
4245   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4246       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4247     auto it = Inst.begin();
4248     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4249     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4250     ++it;
4251     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4252   }
4253 }
4254 
4255 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4256   OptionalImmIndexMap OptionalIdx;
4257   cvtVOP3(Inst, Operands, OptionalIdx);
4258 }
4259 
4260 void AMDGPUAsmParser::cvtVOP3PImpl(MCInst &Inst,
4261                                    const OperandVector &Operands,
4262                                    bool IsPacked) {
4263   OptionalImmIndexMap OptIdx;
4264 
4265   cvtVOP3(Inst, Operands, OptIdx);
4266 
4267   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4268   // instruction, and then figure out where to actually put the modifiers
4269   int Opc = Inst.getOpcode();
4270 
4271   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4272 
4273   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4274   if (OpSelHiIdx != -1) {
4275     // TODO: Should we change the printing to match?
4276     int DefaultVal = IsPacked ? -1 : 0;
4277     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4278                           DefaultVal);
4279   }
4280 
4281   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4282   if (NegLoIdx != -1) {
4283     assert(IsPacked);
4284     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4285     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4286   }
4287 
4288   const int Ops[] = { AMDGPU::OpName::src0,
4289                       AMDGPU::OpName::src1,
4290                       AMDGPU::OpName::src2 };
4291   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4292                          AMDGPU::OpName::src1_modifiers,
4293                          AMDGPU::OpName::src2_modifiers };
4294 
4295   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4296 
4297   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4298   unsigned OpSelHi = 0;
4299   unsigned NegLo = 0;
4300   unsigned NegHi = 0;
4301 
4302   if (OpSelHiIdx != -1) {
4303     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4304   }
4305 
4306   if (NegLoIdx != -1) {
4307     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4308     NegLo = Inst.getOperand(NegLoIdx).getImm();
4309     NegHi = Inst.getOperand(NegHiIdx).getImm();
4310   }
4311 
4312   for (int J = 0; J < 3; ++J) {
4313     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4314     if (OpIdx == -1)
4315       break;
4316 
4317     uint32_t ModVal = 0;
4318 
4319     if ((OpSel & (1 << J)) != 0)
4320       ModVal |= SISrcMods::OP_SEL_0;
4321 
4322     if ((OpSelHi & (1 << J)) != 0)
4323       ModVal |= SISrcMods::OP_SEL_1;
4324 
4325     if ((NegLo & (1 << J)) != 0)
4326       ModVal |= SISrcMods::NEG;
4327 
4328     if ((NegHi & (1 << J)) != 0)
4329       ModVal |= SISrcMods::NEG_HI;
4330 
4331     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4332 
4333     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4334   }
4335 }
4336 
4337 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
4338   cvtVOP3PImpl(Inst, Operands, true);
4339 }
4340 
4341 void AMDGPUAsmParser::cvtVOP3P_NotPacked(MCInst &Inst,
4342                                          const OperandVector &Operands) {
4343   cvtVOP3PImpl(Inst, Operands, false);
4344 }
4345 
4346 //===----------------------------------------------------------------------===//
4347 // dpp
4348 //===----------------------------------------------------------------------===//
4349 
4350 bool AMDGPUOperand::isDPPCtrl() const {
4351   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4352   if (result) {
4353     int64_t Imm = getImm();
4354     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4355            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4356            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4357            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4358            (Imm == 0x130) ||
4359            (Imm == 0x134) ||
4360            (Imm == 0x138) ||
4361            (Imm == 0x13c) ||
4362            (Imm == 0x140) ||
4363            (Imm == 0x141) ||
4364            (Imm == 0x142) ||
4365            (Imm == 0x143);
4366   }
4367   return false;
4368 }
4369 
4370 bool AMDGPUOperand::isGPRIdxMode() const {
4371   return isImm() && isUInt<4>(getImm());
4372 }
4373 
4374 bool AMDGPUOperand::isS16Imm() const {
4375   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4376 }
4377 
4378 bool AMDGPUOperand::isU16Imm() const {
4379   return isImm() && isUInt<16>(getImm());
4380 }
4381 
4382 OperandMatchResultTy
4383 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4384   SMLoc S = Parser.getTok().getLoc();
4385   StringRef Prefix;
4386   int64_t Int;
4387 
4388   if (getLexer().getKind() == AsmToken::Identifier) {
4389     Prefix = Parser.getTok().getString();
4390   } else {
4391     return MatchOperand_NoMatch;
4392   }
4393 
4394   if (Prefix == "row_mirror") {
4395     Int = 0x140;
4396     Parser.Lex();
4397   } else if (Prefix == "row_half_mirror") {
4398     Int = 0x141;
4399     Parser.Lex();
4400   } else {
4401     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4402     if (Prefix != "quad_perm"
4403         && Prefix != "row_shl"
4404         && Prefix != "row_shr"
4405         && Prefix != "row_ror"
4406         && Prefix != "wave_shl"
4407         && Prefix != "wave_rol"
4408         && Prefix != "wave_shr"
4409         && Prefix != "wave_ror"
4410         && Prefix != "row_bcast") {
4411       return MatchOperand_NoMatch;
4412     }
4413 
4414     Parser.Lex();
4415     if (getLexer().isNot(AsmToken::Colon))
4416       return MatchOperand_ParseFail;
4417 
4418     if (Prefix == "quad_perm") {
4419       // quad_perm:[%d,%d,%d,%d]
4420       Parser.Lex();
4421       if (getLexer().isNot(AsmToken::LBrac))
4422         return MatchOperand_ParseFail;
4423       Parser.Lex();
4424 
4425       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4426         return MatchOperand_ParseFail;
4427 
4428       for (int i = 0; i < 3; ++i) {
4429         if (getLexer().isNot(AsmToken::Comma))
4430           return MatchOperand_ParseFail;
4431         Parser.Lex();
4432 
4433         int64_t Temp;
4434         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4435           return MatchOperand_ParseFail;
4436         const int shift = i*2 + 2;
4437         Int += (Temp << shift);
4438       }
4439 
4440       if (getLexer().isNot(AsmToken::RBrac))
4441         return MatchOperand_ParseFail;
4442       Parser.Lex();
4443     } else {
4444       // sel:%d
4445       Parser.Lex();
4446       if (getParser().parseAbsoluteExpression(Int))
4447         return MatchOperand_ParseFail;
4448 
4449       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4450         Int |= 0x100;
4451       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4452         Int |= 0x110;
4453       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4454         Int |= 0x120;
4455       } else if (Prefix == "wave_shl" && 1 == Int) {
4456         Int = 0x130;
4457       } else if (Prefix == "wave_rol" && 1 == Int) {
4458         Int = 0x134;
4459       } else if (Prefix == "wave_shr" && 1 == Int) {
4460         Int = 0x138;
4461       } else if (Prefix == "wave_ror" && 1 == Int) {
4462         Int = 0x13C;
4463       } else if (Prefix == "row_bcast") {
4464         if (Int == 15) {
4465           Int = 0x142;
4466         } else if (Int == 31) {
4467           Int = 0x143;
4468         } else {
4469           return MatchOperand_ParseFail;
4470         }
4471       } else {
4472         return MatchOperand_ParseFail;
4473       }
4474     }
4475   }
4476 
4477   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4478   return MatchOperand_Success;
4479 }
4480 
4481 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4482   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4483 }
4484 
4485 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4486   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4487 }
4488 
4489 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4490   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4491 }
4492 
4493 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4494   OptionalImmIndexMap OptionalIdx;
4495 
4496   unsigned I = 1;
4497   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4498   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4499     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4500   }
4501 
4502   // All DPP instructions with at least one source operand have a fake "old"
4503   // source at the beginning that's tied to the dst operand. Handle it here.
4504   if (Desc.getNumOperands() >= 2)
4505     Inst.addOperand(Inst.getOperand(0));
4506 
4507   for (unsigned E = Operands.size(); I != E; ++I) {
4508     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4509     // Add the register arguments
4510     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4511       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4512       // Skip it.
4513       continue;
4514     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4515       Op.addRegWithFPInputModsOperands(Inst, 2);
4516     } else if (Op.isDPPCtrl()) {
4517       Op.addImmOperands(Inst, 1);
4518     } else if (Op.isImm()) {
4519       // Handle optional arguments
4520       OptionalIdx[Op.getImmTy()] = I;
4521     } else {
4522       llvm_unreachable("Invalid operand type");
4523     }
4524   }
4525 
4526   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4527   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4529 }
4530 
4531 //===----------------------------------------------------------------------===//
4532 // sdwa
4533 //===----------------------------------------------------------------------===//
4534 
4535 OperandMatchResultTy
4536 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4537                               AMDGPUOperand::ImmTy Type) {
4538   using namespace llvm::AMDGPU::SDWA;
4539 
4540   SMLoc S = Parser.getTok().getLoc();
4541   StringRef Value;
4542   OperandMatchResultTy res;
4543 
4544   res = parseStringWithPrefix(Prefix, Value);
4545   if (res != MatchOperand_Success) {
4546     return res;
4547   }
4548 
4549   int64_t Int;
4550   Int = StringSwitch<int64_t>(Value)
4551         .Case("BYTE_0", SdwaSel::BYTE_0)
4552         .Case("BYTE_1", SdwaSel::BYTE_1)
4553         .Case("BYTE_2", SdwaSel::BYTE_2)
4554         .Case("BYTE_3", SdwaSel::BYTE_3)
4555         .Case("WORD_0", SdwaSel::WORD_0)
4556         .Case("WORD_1", SdwaSel::WORD_1)
4557         .Case("DWORD", SdwaSel::DWORD)
4558         .Default(0xffffffff);
4559   Parser.Lex(); // eat last token
4560 
4561   if (Int == 0xffffffff) {
4562     return MatchOperand_ParseFail;
4563   }
4564 
4565   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4566   return MatchOperand_Success;
4567 }
4568 
4569 OperandMatchResultTy
4570 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4571   using namespace llvm::AMDGPU::SDWA;
4572 
4573   SMLoc S = Parser.getTok().getLoc();
4574   StringRef Value;
4575   OperandMatchResultTy res;
4576 
4577   res = parseStringWithPrefix("dst_unused", Value);
4578   if (res != MatchOperand_Success) {
4579     return res;
4580   }
4581 
4582   int64_t Int;
4583   Int = StringSwitch<int64_t>(Value)
4584         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4585         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4586         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4587         .Default(0xffffffff);
4588   Parser.Lex(); // eat last token
4589 
4590   if (Int == 0xffffffff) {
4591     return MatchOperand_ParseFail;
4592   }
4593 
4594   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4595   return MatchOperand_Success;
4596 }
4597 
4598 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4599   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4600 }
4601 
4602 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4603   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4604 }
4605 
4606 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4607   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4608 }
4609 
4610 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4611   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4612 }
4613 
4614 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4615                               uint64_t BasicInstType, bool skipVcc) {
4616   using namespace llvm::AMDGPU::SDWA;
4617 
4618   OptionalImmIndexMap OptionalIdx;
4619   bool skippedVcc = false;
4620 
4621   unsigned I = 1;
4622   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4623   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4624     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4625   }
4626 
4627   for (unsigned E = Operands.size(); I != E; ++I) {
4628     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4629     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4630       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4631       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4632       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4633       // Skip VCC only if we didn't skip it on previous iteration.
4634       if (BasicInstType == SIInstrFlags::VOP2 &&
4635           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4636         skippedVcc = true;
4637         continue;
4638       } else if (BasicInstType == SIInstrFlags::VOPC &&
4639                  Inst.getNumOperands() == 0) {
4640         skippedVcc = true;
4641         continue;
4642       }
4643     }
4644     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4645       Op.addRegWithInputModsOperands(Inst, 2);
4646     } else if (Op.isImm()) {
4647       // Handle optional arguments
4648       OptionalIdx[Op.getImmTy()] = I;
4649     } else {
4650       llvm_unreachable("Invalid operand type");
4651     }
4652     skippedVcc = false;
4653   }
4654 
4655   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4656       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4657     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4658     switch (BasicInstType) {
4659     case SIInstrFlags::VOP1:
4660       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4661       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4662         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4663       }
4664       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4665       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4666       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4667       break;
4668 
4669     case SIInstrFlags::VOP2:
4670       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4671       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4672         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4673       }
4674       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4675       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4676       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4677       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4678       break;
4679 
4680     case SIInstrFlags::VOPC:
4681       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4682       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4683       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4684       break;
4685 
4686     default:
4687       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4688     }
4689   }
4690 
4691   // special case v_mac_{f16, f32}:
4692   // it has src2 register operand that is tied to dst operand
4693   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4694       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4695     auto it = Inst.begin();
4696     std::advance(
4697       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4698     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4699   }
4700 }
4701 
4702 /// Force static initialization.
4703 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4704   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4705   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4706 }
4707 
4708 #define GET_REGISTER_MATCHER
4709 #define GET_MATCHER_IMPLEMENTATION
4710 #include "AMDGPUGenAsmMatcher.inc"
4711 
4712 // This fuction should be defined after auto-generated include so that we have
4713 // MatchClassKind enum defined
4714 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4715                                                      unsigned Kind) {
4716   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4717   // But MatchInstructionImpl() expects to meet token and fails to validate
4718   // operand. This method checks if we are given immediate operand but expect to
4719   // get corresponding token.
4720   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4721   switch (Kind) {
4722   case MCK_addr64:
4723     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4724   case MCK_gds:
4725     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4726   case MCK_glc:
4727     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4728   case MCK_idxen:
4729     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4730   case MCK_offen:
4731     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4732   case MCK_SSrcB32:
4733     // When operands have expression values, they will return true for isToken,
4734     // because it is not possible to distinguish between a token and an
4735     // expression at parse time. MatchInstructionImpl() will always try to
4736     // match an operand as a token, when isToken returns true, and when the
4737     // name of the expression is not a valid token, the match will fail,
4738     // so we need to handle it here.
4739     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4740   case MCK_SSrcF32:
4741     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4742   case MCK_SoppBrTarget:
4743     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4744   case MCK_VReg32OrOff:
4745     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4746   case MCK_InterpSlot:
4747     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4748   case MCK_Attr:
4749     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4750   case MCK_AttrChan:
4751     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4752   default:
4753     return Match_InvalidOperand;
4754   }
4755 }
4756