1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUCodeObjectMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyOffset0,
136     ImmTyOffset1,
137     ImmTyGLC,
138     ImmTySLC,
139     ImmTyTFE,
140     ImmTyClampSI,
141     ImmTyOModSI,
142     ImmTyDppCtrl,
143     ImmTyDppRowMask,
144     ImmTyDppBankMask,
145     ImmTyDppBoundCtrl,
146     ImmTySdwaDstSel,
147     ImmTySdwaSrc0Sel,
148     ImmTySdwaSrc1Sel,
149     ImmTySdwaDstUnused,
150     ImmTyDMask,
151     ImmTyUNorm,
152     ImmTyDA,
153     ImmTyR128,
154     ImmTyLWE,
155     ImmTyExpTgt,
156     ImmTyExpCompr,
157     ImmTyExpVM,
158     ImmTyDFMT,
159     ImmTyNFMT,
160     ImmTyHwreg,
161     ImmTyOff,
162     ImmTySendMsg,
163     ImmTyInterpSlot,
164     ImmTyInterpAttr,
165     ImmTyAttrChan,
166     ImmTyOpSel,
167     ImmTyOpSelHi,
168     ImmTyNegLo,
169     ImmTyNegHi,
170     ImmTySwizzle,
171     ImmTyHigh
172   };
173 
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     bool IsForcedVOP3;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     if (Kind != Expression || !Expr)
204       return false;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   bool isInlinableImm(MVT type) const;
218   bool isLiteralImm(MVT type) const;
219 
220   bool isRegKind() const {
221     return Kind == Register;
222   }
223 
224   bool isReg() const override {
225     return isRegKind() && !hasModifiers();
226   }
227 
228   bool isRegOrImmWithInputMods(MVT type) const {
229     return isRegKind() || isInlinableImm(type);
230   }
231 
232   bool isRegOrImmWithInt16InputMods() const {
233     return isRegOrImmWithInputMods(MVT::i16);
234   }
235 
236   bool isRegOrImmWithInt32InputMods() const {
237     return isRegOrImmWithInputMods(MVT::i32);
238   }
239 
240   bool isRegOrImmWithInt64InputMods() const {
241     return isRegOrImmWithInputMods(MVT::i64);
242   }
243 
244   bool isRegOrImmWithFP16InputMods() const {
245     return isRegOrImmWithInputMods(MVT::f16);
246   }
247 
248   bool isRegOrImmWithFP32InputMods() const {
249     return isRegOrImmWithInputMods(MVT::f32);
250   }
251 
252   bool isRegOrImmWithFP64InputMods() const {
253     return isRegOrImmWithInputMods(MVT::f64);
254   }
255 
256   bool isVReg() const {
257     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
258            isRegClass(AMDGPU::VReg_64RegClassID) ||
259            isRegClass(AMDGPU::VReg_96RegClassID) ||
260            isRegClass(AMDGPU::VReg_128RegClassID) ||
261            isRegClass(AMDGPU::VReg_256RegClassID) ||
262            isRegClass(AMDGPU::VReg_512RegClassID);
263   }
264 
265   bool isVReg32OrOff() const {
266     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
267   }
268 
269   bool isSDWARegKind() const;
270 
271   bool isImmTy(ImmTy ImmT) const {
272     return isImm() && Imm.Type == ImmT;
273   }
274 
275   bool isImmModifier() const {
276     return isImm() && Imm.Type != ImmTyNone;
277   }
278 
279   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
280   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
281   bool isDMask() const { return isImmTy(ImmTyDMask); }
282   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
283   bool isDA() const { return isImmTy(ImmTyDA); }
284   bool isR128() const { return isImmTy(ImmTyUNorm); }
285   bool isLWE() const { return isImmTy(ImmTyLWE); }
286   bool isOff() const { return isImmTy(ImmTyOff); }
287   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
288   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
289   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
290   bool isOffen() const { return isImmTy(ImmTyOffen); }
291   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
292   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
293   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
294   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
295   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
296 
297   bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); }
298   bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); }
299   bool isGDS() const { return isImmTy(ImmTyGDS); }
300   bool isGLC() const { return isImmTy(ImmTyGLC); }
301   bool isSLC() const { return isImmTy(ImmTySLC); }
302   bool isTFE() const { return isImmTy(ImmTyTFE); }
303   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
304   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
305   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
306   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
307   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
308   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
309   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
310   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
311   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
312   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
313   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
314   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
315   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
316   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
317   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
318   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
319   bool isHigh() const { return isImmTy(ImmTyHigh); }
320 
321   bool isMod() const {
322     return isClampSI() || isOModSI();
323   }
324 
325   bool isRegOrImm() const {
326     return isReg() || isImm();
327   }
328 
329   bool isRegClass(unsigned RCID) const;
330 
331   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
332     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
333   }
334 
335   bool isSCSrcB16() const {
336     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
337   }
338 
339   bool isSCSrcV2B16() const {
340     return isSCSrcB16();
341   }
342 
343   bool isSCSrcB32() const {
344     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
345   }
346 
347   bool isSCSrcB64() const {
348     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
349   }
350 
351   bool isSCSrcF16() const {
352     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
353   }
354 
355   bool isSCSrcV2F16() const {
356     return isSCSrcF16();
357   }
358 
359   bool isSCSrcF32() const {
360     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
361   }
362 
363   bool isSCSrcF64() const {
364     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
365   }
366 
367   bool isSSrcB32() const {
368     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
369   }
370 
371   bool isSSrcB16() const {
372     return isSCSrcB16() || isLiteralImm(MVT::i16);
373   }
374 
375   bool isSSrcV2B16() const {
376     llvm_unreachable("cannot happen");
377     return isSSrcB16();
378   }
379 
380   bool isSSrcB64() const {
381     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
382     // See isVSrc64().
383     return isSCSrcB64() || isLiteralImm(MVT::i64);
384   }
385 
386   bool isSSrcF32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
388   }
389 
390   bool isSSrcF64() const {
391     return isSCSrcB64() || isLiteralImm(MVT::f64);
392   }
393 
394   bool isSSrcF16() const {
395     return isSCSrcB16() || isLiteralImm(MVT::f16);
396   }
397 
398   bool isSSrcV2F16() const {
399     llvm_unreachable("cannot happen");
400     return isSSrcF16();
401   }
402 
403   bool isVCSrcB32() const {
404     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
405   }
406 
407   bool isVCSrcB64() const {
408     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
409   }
410 
411   bool isVCSrcB16() const {
412     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
413   }
414 
415   bool isVCSrcV2B16() const {
416     return isVCSrcB16();
417   }
418 
419   bool isVCSrcF32() const {
420     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
421   }
422 
423   bool isVCSrcF64() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
425   }
426 
427   bool isVCSrcF16() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
429   }
430 
431   bool isVCSrcV2F16() const {
432     return isVCSrcF16();
433   }
434 
435   bool isVSrcB32() const {
436     return isVCSrcF32() || isLiteralImm(MVT::i32);
437   }
438 
439   bool isVSrcB64() const {
440     return isVCSrcF64() || isLiteralImm(MVT::i64);
441   }
442 
443   bool isVSrcB16() const {
444     return isVCSrcF16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isVSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isVSrcB16();
450   }
451 
452   bool isVSrcF32() const {
453     return isVCSrcF32() || isLiteralImm(MVT::f32);
454   }
455 
456   bool isVSrcF64() const {
457     return isVCSrcF64() || isLiteralImm(MVT::f64);
458   }
459 
460   bool isVSrcF16() const {
461     return isVCSrcF16() || isLiteralImm(MVT::f16);
462   }
463 
464   bool isVSrcV2F16() const {
465     llvm_unreachable("cannot happen");
466     return isVSrcF16();
467   }
468 
469   bool isKImmFP32() const {
470     return isLiteralImm(MVT::f32);
471   }
472 
473   bool isKImmFP16() const {
474     return isLiteralImm(MVT::f16);
475   }
476 
477   bool isMem() const override {
478     return false;
479   }
480 
481   bool isExpr() const {
482     return Kind == Expression;
483   }
484 
485   bool isSoppBrTarget() const {
486     return isExpr() || isImm();
487   }
488 
489   bool isSWaitCnt() const;
490   bool isHwreg() const;
491   bool isSendMsg() const;
492   bool isSwizzle() const;
493   bool isSMRDOffset8() const;
494   bool isSMRDOffset20() const;
495   bool isSMRDLiteralOffset() const;
496   bool isDPPCtrl() const;
497   bool isGPRIdxMode() const;
498   bool isS16Imm() const;
499   bool isU16Imm() const;
500 
501   StringRef getExpressionAsToken() const {
502     assert(isExpr());
503     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
504     return S->getSymbol().getName();
505   }
506 
507   StringRef getToken() const {
508     assert(isToken());
509 
510     if (Kind == Expression)
511       return getExpressionAsToken();
512 
513     return StringRef(Tok.Data, Tok.Length);
514   }
515 
516   int64_t getImm() const {
517     assert(isImm());
518     return Imm.Val;
519   }
520 
521   ImmTy getImmTy() const {
522     assert(isImm());
523     return Imm.Type;
524   }
525 
526   unsigned getReg() const override {
527     return Reg.RegNo;
528   }
529 
530   SMLoc getStartLoc() const override {
531     return StartLoc;
532   }
533 
534   SMLoc getEndLoc() const override {
535     return EndLoc;
536   }
537 
538   Modifiers getModifiers() const {
539     assert(isRegKind() || isImmTy(ImmTyNone));
540     return isRegKind() ? Reg.Mods : Imm.Mods;
541   }
542 
543   void setModifiers(Modifiers Mods) {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     if (isRegKind())
546       Reg.Mods = Mods;
547     else
548       Imm.Mods = Mods;
549   }
550 
551   bool hasModifiers() const {
552     return getModifiers().hasModifiers();
553   }
554 
555   bool hasFPModifiers() const {
556     return getModifiers().hasFPModifiers();
557   }
558 
559   bool hasIntModifiers() const {
560     return getModifiers().hasIntModifiers();
561   }
562 
563   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
564 
565   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
566 
567   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
568 
569   template <unsigned Bitwidth>
570   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
571 
572   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
573     addKImmFPOperands<16>(Inst, N);
574   }
575 
576   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
577     addKImmFPOperands<32>(Inst, N);
578   }
579 
580   void addRegOperands(MCInst &Inst, unsigned N) const;
581 
582   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
583     if (isRegKind())
584       addRegOperands(Inst, N);
585     else if (isExpr())
586       Inst.addOperand(MCOperand::createExpr(Expr));
587     else
588       addImmOperands(Inst, N);
589   }
590 
591   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
592     Modifiers Mods = getModifiers();
593     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
594     if (isRegKind()) {
595       addRegOperands(Inst, N);
596     } else {
597       addImmOperands(Inst, N, false);
598     }
599   }
600 
601   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
602     assert(!hasIntModifiers());
603     addRegOrImmWithInputModsOperands(Inst, N);
604   }
605 
606   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasFPModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
612     Modifiers Mods = getModifiers();
613     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
614     assert(isRegKind());
615     addRegOperands(Inst, N);
616   }
617 
618   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
619     assert(!hasIntModifiers());
620     addRegWithInputModsOperands(Inst, N);
621   }
622 
623   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasFPModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
629     if (isImm())
630       addImmOperands(Inst, N);
631     else {
632       assert(isExpr());
633       Inst.addOperand(MCOperand::createExpr(Expr));
634     }
635   }
636 
637   static void printImmTy(raw_ostream& OS, ImmTy Type) {
638     switch (Type) {
639     case ImmTyNone: OS << "None"; break;
640     case ImmTyGDS: OS << "GDS"; break;
641     case ImmTyOffen: OS << "Offen"; break;
642     case ImmTyIdxen: OS << "Idxen"; break;
643     case ImmTyAddr64: OS << "Addr64"; break;
644     case ImmTyOffset: OS << "Offset"; break;
645     case ImmTyOffset0: OS << "Offset0"; break;
646     case ImmTyOffset1: OS << "Offset1"; break;
647     case ImmTyGLC: OS << "GLC"; break;
648     case ImmTySLC: OS << "SLC"; break;
649     case ImmTyTFE: OS << "TFE"; break;
650     case ImmTyDFMT: OS << "DFMT"; break;
651     case ImmTyNFMT: OS << "NFMT"; break;
652     case ImmTyClampSI: OS << "ClampSI"; break;
653     case ImmTyOModSI: OS << "OModSI"; break;
654     case ImmTyDppCtrl: OS << "DppCtrl"; break;
655     case ImmTyDppRowMask: OS << "DppRowMask"; break;
656     case ImmTyDppBankMask: OS << "DppBankMask"; break;
657     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
658     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
659     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
660     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
661     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
662     case ImmTyDMask: OS << "DMask"; break;
663     case ImmTyUNorm: OS << "UNorm"; break;
664     case ImmTyDA: OS << "DA"; break;
665     case ImmTyR128: OS << "R128"; break;
666     case ImmTyLWE: OS << "LWE"; break;
667     case ImmTyOff: OS << "Off"; break;
668     case ImmTyExpTgt: OS << "ExpTgt"; break;
669     case ImmTyExpCompr: OS << "ExpCompr"; break;
670     case ImmTyExpVM: OS << "ExpVM"; break;
671     case ImmTyHwreg: OS << "Hwreg"; break;
672     case ImmTySendMsg: OS << "SendMsg"; break;
673     case ImmTyInterpSlot: OS << "InterpSlot"; break;
674     case ImmTyInterpAttr: OS << "InterpAttr"; break;
675     case ImmTyAttrChan: OS << "AttrChan"; break;
676     case ImmTyOpSel: OS << "OpSel"; break;
677     case ImmTyOpSelHi: OS << "OpSelHi"; break;
678     case ImmTyNegLo: OS << "NegLo"; break;
679     case ImmTyNegHi: OS << "NegHi"; break;
680     case ImmTySwizzle: OS << "Swizzle"; break;
681     case ImmTyHigh: OS << "High"; break;
682     }
683   }
684 
685   void print(raw_ostream &OS) const override {
686     switch (Kind) {
687     case Register:
688       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
689       break;
690     case Immediate:
691       OS << '<' << getImm();
692       if (getImmTy() != ImmTyNone) {
693         OS << " type: "; printImmTy(OS, getImmTy());
694       }
695       OS << " mods: " << Imm.Mods << '>';
696       break;
697     case Token:
698       OS << '\'' << getToken() << '\'';
699       break;
700     case Expression:
701       OS << "<expr " << *Expr << '>';
702       break;
703     }
704   }
705 
706   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
707                                       int64_t Val, SMLoc Loc,
708                                       ImmTy Type = ImmTyNone,
709                                       bool IsFPImm = false) {
710     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
711     Op->Imm.Val = Val;
712     Op->Imm.IsFPImm = IsFPImm;
713     Op->Imm.Type = Type;
714     Op->Imm.Mods = Modifiers();
715     Op->StartLoc = Loc;
716     Op->EndLoc = Loc;
717     return Op;
718   }
719 
720   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
721                                         StringRef Str, SMLoc Loc,
722                                         bool HasExplicitEncodingSize = true) {
723     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
724     Res->Tok.Data = Str.data();
725     Res->Tok.Length = Str.size();
726     Res->StartLoc = Loc;
727     Res->EndLoc = Loc;
728     return Res;
729   }
730 
731   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
732                                       unsigned RegNo, SMLoc S,
733                                       SMLoc E,
734                                       bool ForceVOP3) {
735     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
736     Op->Reg.RegNo = RegNo;
737     Op->Reg.Mods = Modifiers();
738     Op->Reg.IsForcedVOP3 = ForceVOP3;
739     Op->StartLoc = S;
740     Op->EndLoc = E;
741     return Op;
742   }
743 
744   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
745                                        const class MCExpr *Expr, SMLoc S) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
747     Op->Expr = Expr;
748     Op->StartLoc = S;
749     Op->EndLoc = S;
750     return Op;
751   }
752 };
753 
754 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
755   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
756   return OS;
757 }
758 
759 //===----------------------------------------------------------------------===//
760 // AsmParser
761 //===----------------------------------------------------------------------===//
762 
763 // Holds info related to the current kernel, e.g. count of SGPRs used.
764 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
765 // .amdgpu_hsa_kernel or at EOF.
766 class KernelScopeInfo {
767   int SgprIndexUnusedMin = -1;
768   int VgprIndexUnusedMin = -1;
769   MCContext *Ctx = nullptr;
770 
771   void usesSgprAt(int i) {
772     if (i >= SgprIndexUnusedMin) {
773       SgprIndexUnusedMin = ++i;
774       if (Ctx) {
775         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
776         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
777       }
778     }
779   }
780 
781   void usesVgprAt(int i) {
782     if (i >= VgprIndexUnusedMin) {
783       VgprIndexUnusedMin = ++i;
784       if (Ctx) {
785         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
786         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
787       }
788     }
789   }
790 
791 public:
792   KernelScopeInfo() = default;
793 
794   void initialize(MCContext &Context) {
795     Ctx = &Context;
796     usesSgprAt(SgprIndexUnusedMin = -1);
797     usesVgprAt(VgprIndexUnusedMin = -1);
798   }
799 
800   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
801     switch (RegKind) {
802       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
803       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
804       default: break;
805     }
806   }
807 };
808 
809 class AMDGPUAsmParser : public MCTargetAsmParser {
810   const MCInstrInfo &MII;
811   MCAsmParser &Parser;
812 
813   unsigned ForcedEncodingSize = 0;
814   bool ForcedDPP = false;
815   bool ForcedSDWA = false;
816   KernelScopeInfo KernelScope;
817 
818   /// @name Auto-generated Match Functions
819   /// {
820 
821 #define GET_ASSEMBLER_HEADER
822 #include "AMDGPUGenAsmMatcher.inc"
823 
824   /// }
825 
826 private:
827   bool ParseAsAbsoluteExpression(uint32_t &Ret);
828   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
829   bool ParseDirectiveHSACodeObjectVersion();
830   bool ParseDirectiveHSACodeObjectISA();
831   bool ParseDirectiveCodeObjectMetadata();
832   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
833   bool ParseDirectiveAMDKernelCodeT();
834   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
835   bool ParseDirectiveAMDGPUHsaKernel();
836   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
837                              RegisterKind RegKind, unsigned Reg1,
838                              unsigned RegNum);
839   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
840                            unsigned& RegNum, unsigned& RegWidth,
841                            unsigned *DwordRegIndex);
842   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
843                     bool IsAtomic, bool IsAtomicReturn);
844   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
845                  bool IsGdsHardcoded);
846 
847 public:
848   enum AMDGPUMatchResultTy {
849     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
850   };
851 
852   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
853 
854   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
855                const MCInstrInfo &MII,
856                const MCTargetOptions &Options)
857       : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
858     MCAsmParserExtension::Initialize(Parser);
859 
860     if (getFeatureBits().none()) {
861       // Set default features.
862       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
863     }
864 
865     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
866 
867     {
868       // TODO: make those pre-defined variables read-only.
869       // Currently there is none suitable machinery in the core llvm-mc for this.
870       // MCSymbol::isRedefinable is intended for another purpose, and
871       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
872       AMDGPU::IsaInfo::IsaVersion ISA =
873           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
874       MCContext &Ctx = getContext();
875       MCSymbol *Sym =
876           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
877       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
878       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
879       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
880       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
881       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
882     }
883     KernelScope.initialize(getContext());
884   }
885 
886   bool isSI() const {
887     return AMDGPU::isSI(getSTI());
888   }
889 
890   bool isCI() const {
891     return AMDGPU::isCI(getSTI());
892   }
893 
894   bool isVI() const {
895     return AMDGPU::isVI(getSTI());
896   }
897 
898   bool isGFX9() const {
899     return AMDGPU::isGFX9(getSTI());
900   }
901 
902   bool hasInv2PiInlineImm() const {
903     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
904   }
905 
906   bool hasFlatOffsets() const {
907     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
908   }
909 
910   bool hasSGPR102_SGPR103() const {
911     return !isVI();
912   }
913 
914   bool hasIntClamp() const {
915     return getFeatureBits()[AMDGPU::FeatureIntClamp];
916   }
917 
918   AMDGPUTargetStreamer &getTargetStreamer() {
919     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
920     return static_cast<AMDGPUTargetStreamer &>(TS);
921   }
922 
923   const MCRegisterInfo *getMRI() const {
924     // We need this const_cast because for some reason getContext() is not const
925     // in MCAsmParser.
926     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
927   }
928 
929   const MCInstrInfo *getMII() const {
930     return &MII;
931   }
932 
933   const FeatureBitset &getFeatureBits() const {
934     return getSTI().getFeatureBits();
935   }
936 
937   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
938   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
939   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
940 
941   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
942   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
943   bool isForcedDPP() const { return ForcedDPP; }
944   bool isForcedSDWA() const { return ForcedSDWA; }
945   ArrayRef<unsigned> getMatchedVariants() const;
946 
947   std::unique_ptr<AMDGPUOperand> parseRegister();
948   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
949   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
950   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
951                                       unsigned Kind) override;
952   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
953                                OperandVector &Operands, MCStreamer &Out,
954                                uint64_t &ErrorInfo,
955                                bool MatchingInlineAsm) override;
956   bool ParseDirective(AsmToken DirectiveID) override;
957   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
958   StringRef parseMnemonicSuffix(StringRef Name);
959   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
960                         SMLoc NameLoc, OperandVector &Operands) override;
961   //bool ProcessInstruction(MCInst &Inst);
962 
963   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
964 
965   OperandMatchResultTy
966   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
967                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
968                      bool (*ConvertResult)(int64_t &) = nullptr);
969 
970   OperandMatchResultTy parseOperandArrayWithPrefix(
971     const char *Prefix,
972     OperandVector &Operands,
973     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
974     bool (*ConvertResult)(int64_t&) = nullptr);
975 
976   OperandMatchResultTy
977   parseNamedBit(const char *Name, OperandVector &Operands,
978                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
979   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
980                                              StringRef &Value);
981 
982   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
983   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
984   OperandMatchResultTy parseReg(OperandVector &Operands);
985   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
986   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
987   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
988   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
989   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
990   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
991 
992   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
993   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
994   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
995   void cvtExp(MCInst &Inst, const OperandVector &Operands);
996 
997   bool parseCnt(int64_t &IntVal);
998   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
999   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1000 
1001 private:
1002   struct OperandInfoTy {
1003     int64_t Id;
1004     bool IsSymbolic = false;
1005 
1006     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1007   };
1008 
1009   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1010   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1011 
1012   void errorExpTgt();
1013   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1014 
1015   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1016   bool validateConstantBusLimitations(const MCInst &Inst);
1017   bool validateEarlyClobberLimitations(const MCInst &Inst);
1018   bool validateIntClampSupported(const MCInst &Inst);
1019   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1020   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1021   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1022 
1023   bool trySkipId(const StringRef Id);
1024   bool trySkipToken(const AsmToken::TokenKind Kind);
1025   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1026   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1027   bool parseExpr(int64_t &Imm);
1028 
1029 public:
1030   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1031 
1032   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1033   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1034   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1035   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1036   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1037 
1038   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1039                             const unsigned MinVal,
1040                             const unsigned MaxVal,
1041                             const StringRef ErrMsg);
1042   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1043   bool parseSwizzleOffset(int64_t &Imm);
1044   bool parseSwizzleMacro(int64_t &Imm);
1045   bool parseSwizzleQuadPerm(int64_t &Imm);
1046   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1047   bool parseSwizzleBroadcast(int64_t &Imm);
1048   bool parseSwizzleSwap(int64_t &Imm);
1049   bool parseSwizzleReverse(int64_t &Imm);
1050 
1051   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1052   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1053   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1054   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1055 
1056   AMDGPUOperand::Ptr defaultGLC() const;
1057   AMDGPUOperand::Ptr defaultSLC() const;
1058   AMDGPUOperand::Ptr defaultTFE() const;
1059 
1060   AMDGPUOperand::Ptr defaultDMask() const;
1061   AMDGPUOperand::Ptr defaultUNorm() const;
1062   AMDGPUOperand::Ptr defaultDA() const;
1063   AMDGPUOperand::Ptr defaultR128() const;
1064   AMDGPUOperand::Ptr defaultLWE() const;
1065   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1066   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1067   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1068   AMDGPUOperand::Ptr defaultOffsetU12() const;
1069   AMDGPUOperand::Ptr defaultOffsetS13() const;
1070 
1071   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1072 
1073   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1074                OptionalImmIndexMap &OptionalIdx);
1075   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1076   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1077   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1078 
1079   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1080 
1081   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1082                bool IsAtomic = false);
1083   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1084 
1085   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1086   AMDGPUOperand::Ptr defaultRowMask() const;
1087   AMDGPUOperand::Ptr defaultBankMask() const;
1088   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1089   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1090 
1091   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1092                                     AMDGPUOperand::ImmTy Type);
1093   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1094   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1095   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1096   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1097   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1098   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1099                 uint64_t BasicInstType, bool skipVcc = false);
1100 };
1101 
1102 struct OptionalOperand {
1103   const char *Name;
1104   AMDGPUOperand::ImmTy Type;
1105   bool IsBit;
1106   bool (*ConvertResult)(int64_t&);
1107 };
1108 
1109 } // end anonymous namespace
1110 
1111 // May be called with integer type with equivalent bitwidth.
1112 static const fltSemantics *getFltSemantics(unsigned Size) {
1113   switch (Size) {
1114   case 4:
1115     return &APFloat::IEEEsingle();
1116   case 8:
1117     return &APFloat::IEEEdouble();
1118   case 2:
1119     return &APFloat::IEEEhalf();
1120   default:
1121     llvm_unreachable("unsupported fp type");
1122   }
1123 }
1124 
1125 static const fltSemantics *getFltSemantics(MVT VT) {
1126   return getFltSemantics(VT.getSizeInBits() / 8);
1127 }
1128 
1129 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1130   switch (OperandType) {
1131   case AMDGPU::OPERAND_REG_IMM_INT32:
1132   case AMDGPU::OPERAND_REG_IMM_FP32:
1133   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1134   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1135     return &APFloat::IEEEsingle();
1136   case AMDGPU::OPERAND_REG_IMM_INT64:
1137   case AMDGPU::OPERAND_REG_IMM_FP64:
1138   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1139   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1140     return &APFloat::IEEEdouble();
1141   case AMDGPU::OPERAND_REG_IMM_INT16:
1142   case AMDGPU::OPERAND_REG_IMM_FP16:
1143   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1144   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1145   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1146   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1147     return &APFloat::IEEEhalf();
1148   default:
1149     llvm_unreachable("unsupported fp type");
1150   }
1151 }
1152 
1153 //===----------------------------------------------------------------------===//
1154 // Operand
1155 //===----------------------------------------------------------------------===//
1156 
1157 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1158   bool Lost;
1159 
1160   // Convert literal to single precision
1161   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1162                                                APFloat::rmNearestTiesToEven,
1163                                                &Lost);
1164   // We allow precision lost but not overflow or underflow
1165   if (Status != APFloat::opOK &&
1166       Lost &&
1167       ((Status & APFloat::opOverflow)  != 0 ||
1168        (Status & APFloat::opUnderflow) != 0)) {
1169     return false;
1170   }
1171 
1172   return true;
1173 }
1174 
1175 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1176   if (!isImmTy(ImmTyNone)) {
1177     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1178     return false;
1179   }
1180   // TODO: We should avoid using host float here. It would be better to
1181   // check the float bit values which is what a few other places do.
1182   // We've had bot failures before due to weird NaN support on mips hosts.
1183 
1184   APInt Literal(64, Imm.Val);
1185 
1186   if (Imm.IsFPImm) { // We got fp literal token
1187     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1188       return AMDGPU::isInlinableLiteral64(Imm.Val,
1189                                           AsmParser->hasInv2PiInlineImm());
1190     }
1191 
1192     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1193     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1194       return false;
1195 
1196     if (type.getScalarSizeInBits() == 16) {
1197       return AMDGPU::isInlinableLiteral16(
1198         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1199         AsmParser->hasInv2PiInlineImm());
1200     }
1201 
1202     // Check if single precision literal is inlinable
1203     return AMDGPU::isInlinableLiteral32(
1204       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1205       AsmParser->hasInv2PiInlineImm());
1206   }
1207 
1208   // We got int literal token.
1209   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1210     return AMDGPU::isInlinableLiteral64(Imm.Val,
1211                                         AsmParser->hasInv2PiInlineImm());
1212   }
1213 
1214   if (type.getScalarSizeInBits() == 16) {
1215     return AMDGPU::isInlinableLiteral16(
1216       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1217       AsmParser->hasInv2PiInlineImm());
1218   }
1219 
1220   return AMDGPU::isInlinableLiteral32(
1221     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1222     AsmParser->hasInv2PiInlineImm());
1223 }
1224 
1225 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1226   // Check that this immediate can be added as literal
1227   if (!isImmTy(ImmTyNone)) {
1228     return false;
1229   }
1230 
1231   if (!Imm.IsFPImm) {
1232     // We got int literal token.
1233 
1234     if (type == MVT::f64 && hasFPModifiers()) {
1235       // Cannot apply fp modifiers to int literals preserving the same semantics
1236       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1237       // disable these cases.
1238       return false;
1239     }
1240 
1241     unsigned Size = type.getSizeInBits();
1242     if (Size == 64)
1243       Size = 32;
1244 
1245     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1246     // types.
1247     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1248   }
1249 
1250   // We got fp literal token
1251   if (type == MVT::f64) { // Expected 64-bit fp operand
1252     // We would set low 64-bits of literal to zeroes but we accept this literals
1253     return true;
1254   }
1255 
1256   if (type == MVT::i64) { // Expected 64-bit int operand
1257     // We don't allow fp literals in 64-bit integer instructions. It is
1258     // unclear how we should encode them.
1259     return false;
1260   }
1261 
1262   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1263   return canLosslesslyConvertToFPType(FPLiteral, type);
1264 }
1265 
1266 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1267   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1268 }
1269 
1270 bool AMDGPUOperand::isSDWARegKind() const {
1271   if (AsmParser->isVI())
1272     return isVReg();
1273   else if (AsmParser->isGFX9())
1274     return isRegKind();
1275   else
1276     return false;
1277 }
1278 
1279 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1280 {
1281   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1282   assert(Size == 2 || Size == 4 || Size == 8);
1283 
1284   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1285 
1286   if (Imm.Mods.Abs) {
1287     Val &= ~FpSignMask;
1288   }
1289   if (Imm.Mods.Neg) {
1290     Val ^= FpSignMask;
1291   }
1292 
1293   return Val;
1294 }
1295 
1296 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1297   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1298                              Inst.getNumOperands())) {
1299     addLiteralImmOperand(Inst, Imm.Val,
1300                          ApplyModifiers &
1301                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1302   } else {
1303     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1304     Inst.addOperand(MCOperand::createImm(Imm.Val));
1305   }
1306 }
1307 
1308 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1309   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1310   auto OpNum = Inst.getNumOperands();
1311   // Check that this operand accepts literals
1312   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1313 
1314   if (ApplyModifiers) {
1315     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1316     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1317     Val = applyInputFPModifiers(Val, Size);
1318   }
1319 
1320   APInt Literal(64, Val);
1321   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1322 
1323   if (Imm.IsFPImm) { // We got fp literal token
1324     switch (OpTy) {
1325     case AMDGPU::OPERAND_REG_IMM_INT64:
1326     case AMDGPU::OPERAND_REG_IMM_FP64:
1327     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1328     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1329       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1330                                        AsmParser->hasInv2PiInlineImm())) {
1331         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1332         return;
1333       }
1334 
1335       // Non-inlineable
1336       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1337         // For fp operands we check if low 32 bits are zeros
1338         if (Literal.getLoBits(32) != 0) {
1339           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1340           "Can't encode literal as exact 64-bit floating-point operand. "
1341           "Low 32-bits will be set to zero");
1342         }
1343 
1344         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1345         return;
1346       }
1347 
1348       // We don't allow fp literals in 64-bit integer instructions. It is
1349       // unclear how we should encode them. This case should be checked earlier
1350       // in predicate methods (isLiteralImm())
1351       llvm_unreachable("fp literal in 64-bit integer instruction.");
1352 
1353     case AMDGPU::OPERAND_REG_IMM_INT32:
1354     case AMDGPU::OPERAND_REG_IMM_FP32:
1355     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1356     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1357     case AMDGPU::OPERAND_REG_IMM_INT16:
1358     case AMDGPU::OPERAND_REG_IMM_FP16:
1359     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1360     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1361     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1362     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1363       bool lost;
1364       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1365       // Convert literal to single precision
1366       FPLiteral.convert(*getOpFltSemantics(OpTy),
1367                         APFloat::rmNearestTiesToEven, &lost);
1368       // We allow precision lost but not overflow or underflow. This should be
1369       // checked earlier in isLiteralImm()
1370 
1371       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1372       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1373           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1374         ImmVal |= (ImmVal << 16);
1375       }
1376 
1377       Inst.addOperand(MCOperand::createImm(ImmVal));
1378       return;
1379     }
1380     default:
1381       llvm_unreachable("invalid operand size");
1382     }
1383 
1384     return;
1385   }
1386 
1387    // We got int literal token.
1388   // Only sign extend inline immediates.
1389   // FIXME: No errors on truncation
1390   switch (OpTy) {
1391   case AMDGPU::OPERAND_REG_IMM_INT32:
1392   case AMDGPU::OPERAND_REG_IMM_FP32:
1393   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1394   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1395     if (isInt<32>(Val) &&
1396         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1397                                      AsmParser->hasInv2PiInlineImm())) {
1398       Inst.addOperand(MCOperand::createImm(Val));
1399       return;
1400     }
1401 
1402     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1403     return;
1404 
1405   case AMDGPU::OPERAND_REG_IMM_INT64:
1406   case AMDGPU::OPERAND_REG_IMM_FP64:
1407   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1408   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1409     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1410       Inst.addOperand(MCOperand::createImm(Val));
1411       return;
1412     }
1413 
1414     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1415     return;
1416 
1417   case AMDGPU::OPERAND_REG_IMM_INT16:
1418   case AMDGPU::OPERAND_REG_IMM_FP16:
1419   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1420   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1421     if (isInt<16>(Val) &&
1422         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1423                                      AsmParser->hasInv2PiInlineImm())) {
1424       Inst.addOperand(MCOperand::createImm(Val));
1425       return;
1426     }
1427 
1428     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1429     return;
1430 
1431   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1432   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1433     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1434     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1435                                         AsmParser->hasInv2PiInlineImm()));
1436 
1437     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1438                       static_cast<uint32_t>(LiteralVal);
1439     Inst.addOperand(MCOperand::createImm(ImmVal));
1440     return;
1441   }
1442   default:
1443     llvm_unreachable("invalid operand size");
1444   }
1445 }
1446 
1447 template <unsigned Bitwidth>
1448 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1449   APInt Literal(64, Imm.Val);
1450 
1451   if (!Imm.IsFPImm) {
1452     // We got int literal token.
1453     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1454     return;
1455   }
1456 
1457   bool Lost;
1458   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1459   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1460                     APFloat::rmNearestTiesToEven, &Lost);
1461   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1462 }
1463 
1464 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1465   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1466 }
1467 
1468 //===----------------------------------------------------------------------===//
1469 // AsmParser
1470 //===----------------------------------------------------------------------===//
1471 
1472 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1473   if (Is == IS_VGPR) {
1474     switch (RegWidth) {
1475       default: return -1;
1476       case 1: return AMDGPU::VGPR_32RegClassID;
1477       case 2: return AMDGPU::VReg_64RegClassID;
1478       case 3: return AMDGPU::VReg_96RegClassID;
1479       case 4: return AMDGPU::VReg_128RegClassID;
1480       case 8: return AMDGPU::VReg_256RegClassID;
1481       case 16: return AMDGPU::VReg_512RegClassID;
1482     }
1483   } else if (Is == IS_TTMP) {
1484     switch (RegWidth) {
1485       default: return -1;
1486       case 1: return AMDGPU::TTMP_32RegClassID;
1487       case 2: return AMDGPU::TTMP_64RegClassID;
1488       case 4: return AMDGPU::TTMP_128RegClassID;
1489     }
1490   } else if (Is == IS_SGPR) {
1491     switch (RegWidth) {
1492       default: return -1;
1493       case 1: return AMDGPU::SGPR_32RegClassID;
1494       case 2: return AMDGPU::SGPR_64RegClassID;
1495       case 4: return AMDGPU::SGPR_128RegClassID;
1496       case 8: return AMDGPU::SReg_256RegClassID;
1497       case 16: return AMDGPU::SReg_512RegClassID;
1498     }
1499   }
1500   return -1;
1501 }
1502 
1503 static unsigned getSpecialRegForName(StringRef RegName) {
1504   return StringSwitch<unsigned>(RegName)
1505     .Case("exec", AMDGPU::EXEC)
1506     .Case("vcc", AMDGPU::VCC)
1507     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1508     .Case("m0", AMDGPU::M0)
1509     .Case("scc", AMDGPU::SCC)
1510     .Case("tba", AMDGPU::TBA)
1511     .Case("tma", AMDGPU::TMA)
1512     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1513     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1514     .Case("vcc_lo", AMDGPU::VCC_LO)
1515     .Case("vcc_hi", AMDGPU::VCC_HI)
1516     .Case("exec_lo", AMDGPU::EXEC_LO)
1517     .Case("exec_hi", AMDGPU::EXEC_HI)
1518     .Case("tma_lo", AMDGPU::TMA_LO)
1519     .Case("tma_hi", AMDGPU::TMA_HI)
1520     .Case("tba_lo", AMDGPU::TBA_LO)
1521     .Case("tba_hi", AMDGPU::TBA_HI)
1522     .Default(0);
1523 }
1524 
1525 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1526                                     SMLoc &EndLoc) {
1527   auto R = parseRegister();
1528   if (!R) return true;
1529   assert(R->isReg());
1530   RegNo = R->getReg();
1531   StartLoc = R->getStartLoc();
1532   EndLoc = R->getEndLoc();
1533   return false;
1534 }
1535 
1536 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1537                                             RegisterKind RegKind, unsigned Reg1,
1538                                             unsigned RegNum) {
1539   switch (RegKind) {
1540   case IS_SPECIAL:
1541     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1542       Reg = AMDGPU::EXEC;
1543       RegWidth = 2;
1544       return true;
1545     }
1546     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1547       Reg = AMDGPU::FLAT_SCR;
1548       RegWidth = 2;
1549       return true;
1550     }
1551     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1552       Reg = AMDGPU::VCC;
1553       RegWidth = 2;
1554       return true;
1555     }
1556     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1557       Reg = AMDGPU::TBA;
1558       RegWidth = 2;
1559       return true;
1560     }
1561     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1562       Reg = AMDGPU::TMA;
1563       RegWidth = 2;
1564       return true;
1565     }
1566     return false;
1567   case IS_VGPR:
1568   case IS_SGPR:
1569   case IS_TTMP:
1570     if (Reg1 != Reg + RegWidth) {
1571       return false;
1572     }
1573     RegWidth++;
1574     return true;
1575   default:
1576     llvm_unreachable("unexpected register kind");
1577   }
1578 }
1579 
1580 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1581                                           unsigned &RegNum, unsigned &RegWidth,
1582                                           unsigned *DwordRegIndex) {
1583   if (DwordRegIndex) { *DwordRegIndex = 0; }
1584   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1585   if (getLexer().is(AsmToken::Identifier)) {
1586     StringRef RegName = Parser.getTok().getString();
1587     if ((Reg = getSpecialRegForName(RegName))) {
1588       Parser.Lex();
1589       RegKind = IS_SPECIAL;
1590     } else {
1591       unsigned RegNumIndex = 0;
1592       if (RegName[0] == 'v') {
1593         RegNumIndex = 1;
1594         RegKind = IS_VGPR;
1595       } else if (RegName[0] == 's') {
1596         RegNumIndex = 1;
1597         RegKind = IS_SGPR;
1598       } else if (RegName.startswith("ttmp")) {
1599         RegNumIndex = strlen("ttmp");
1600         RegKind = IS_TTMP;
1601       } else {
1602         return false;
1603       }
1604       if (RegName.size() > RegNumIndex) {
1605         // Single 32-bit register: vXX.
1606         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1607           return false;
1608         Parser.Lex();
1609         RegWidth = 1;
1610       } else {
1611         // Range of registers: v[XX:YY]. ":YY" is optional.
1612         Parser.Lex();
1613         int64_t RegLo, RegHi;
1614         if (getLexer().isNot(AsmToken::LBrac))
1615           return false;
1616         Parser.Lex();
1617 
1618         if (getParser().parseAbsoluteExpression(RegLo))
1619           return false;
1620 
1621         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1622         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1623           return false;
1624         Parser.Lex();
1625 
1626         if (isRBrace) {
1627           RegHi = RegLo;
1628         } else {
1629           if (getParser().parseAbsoluteExpression(RegHi))
1630             return false;
1631 
1632           if (getLexer().isNot(AsmToken::RBrac))
1633             return false;
1634           Parser.Lex();
1635         }
1636         RegNum = (unsigned) RegLo;
1637         RegWidth = (RegHi - RegLo) + 1;
1638       }
1639     }
1640   } else if (getLexer().is(AsmToken::LBrac)) {
1641     // List of consecutive registers: [s0,s1,s2,s3]
1642     Parser.Lex();
1643     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1644       return false;
1645     if (RegWidth != 1)
1646       return false;
1647     RegisterKind RegKind1;
1648     unsigned Reg1, RegNum1, RegWidth1;
1649     do {
1650       if (getLexer().is(AsmToken::Comma)) {
1651         Parser.Lex();
1652       } else if (getLexer().is(AsmToken::RBrac)) {
1653         Parser.Lex();
1654         break;
1655       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1656         if (RegWidth1 != 1) {
1657           return false;
1658         }
1659         if (RegKind1 != RegKind) {
1660           return false;
1661         }
1662         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1663           return false;
1664         }
1665       } else {
1666         return false;
1667       }
1668     } while (true);
1669   } else {
1670     return false;
1671   }
1672   switch (RegKind) {
1673   case IS_SPECIAL:
1674     RegNum = 0;
1675     RegWidth = 1;
1676     break;
1677   case IS_VGPR:
1678   case IS_SGPR:
1679   case IS_TTMP:
1680   {
1681     unsigned Size = 1;
1682     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1683       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1684       Size = std::min(RegWidth, 4u);
1685     }
1686     if (RegNum % Size != 0)
1687       return false;
1688     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1689     RegNum = RegNum / Size;
1690     int RCID = getRegClass(RegKind, RegWidth);
1691     if (RCID == -1)
1692       return false;
1693     const MCRegisterClass RC = TRI->getRegClass(RCID);
1694     if (RegNum >= RC.getNumRegs())
1695       return false;
1696     Reg = RC.getRegister(RegNum);
1697     break;
1698   }
1699 
1700   default:
1701     llvm_unreachable("unexpected register kind");
1702   }
1703 
1704   if (!subtargetHasRegister(*TRI, Reg))
1705     return false;
1706   return true;
1707 }
1708 
1709 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1710   const auto &Tok = Parser.getTok();
1711   SMLoc StartLoc = Tok.getLoc();
1712   SMLoc EndLoc = Tok.getEndLoc();
1713   RegisterKind RegKind;
1714   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1715 
1716   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1717     return nullptr;
1718   }
1719   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1720   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1721 }
1722 
1723 bool
1724 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1725   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1726       (getLexer().getKind() == AsmToken::Integer ||
1727        getLexer().getKind() == AsmToken::Real)) {
1728     // This is a workaround for handling operands like these:
1729     //     |1.0|
1730     //     |-1|
1731     // This syntax is not compatible with syntax of standard
1732     // MC expressions (due to the trailing '|').
1733 
1734     SMLoc EndLoc;
1735     const MCExpr *Expr;
1736 
1737     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1738       return true;
1739     }
1740 
1741     return !Expr->evaluateAsAbsolute(Val);
1742   }
1743 
1744   return getParser().parseAbsoluteExpression(Val);
1745 }
1746 
1747 OperandMatchResultTy
1748 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1749   // TODO: add syntactic sugar for 1/(2*PI)
1750   bool Minus = false;
1751   if (getLexer().getKind() == AsmToken::Minus) {
1752     Minus = true;
1753     Parser.Lex();
1754   }
1755 
1756   SMLoc S = Parser.getTok().getLoc();
1757   switch(getLexer().getKind()) {
1758   case AsmToken::Integer: {
1759     int64_t IntVal;
1760     if (parseAbsoluteExpr(IntVal, AbsMod))
1761       return MatchOperand_ParseFail;
1762     if (Minus)
1763       IntVal *= -1;
1764     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1765     return MatchOperand_Success;
1766   }
1767   case AsmToken::Real: {
1768     int64_t IntVal;
1769     if (parseAbsoluteExpr(IntVal, AbsMod))
1770       return MatchOperand_ParseFail;
1771 
1772     APFloat F(BitsToDouble(IntVal));
1773     if (Minus)
1774       F.changeSign();
1775     Operands.push_back(
1776         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1777                                  AMDGPUOperand::ImmTyNone, true));
1778     return MatchOperand_Success;
1779   }
1780   default:
1781     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1782   }
1783 }
1784 
1785 OperandMatchResultTy
1786 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1787   if (auto R = parseRegister()) {
1788     assert(R->isReg());
1789     R->Reg.IsForcedVOP3 = isForcedVOP3();
1790     Operands.push_back(std::move(R));
1791     return MatchOperand_Success;
1792   }
1793   return MatchOperand_NoMatch;
1794 }
1795 
1796 OperandMatchResultTy
1797 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1798   auto res = parseImm(Operands, AbsMod);
1799   if (res != MatchOperand_NoMatch) {
1800     return res;
1801   }
1802 
1803   return parseReg(Operands);
1804 }
1805 
1806 OperandMatchResultTy
1807 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1808                                               bool AllowImm) {
1809   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1810 
1811   if (getLexer().getKind()== AsmToken::Minus) {
1812     const AsmToken NextToken = getLexer().peekTok();
1813 
1814     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1815     if (NextToken.is(AsmToken::Minus)) {
1816       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1817       return MatchOperand_ParseFail;
1818     }
1819 
1820     // '-' followed by an integer literal N should be interpreted as integer
1821     // negation rather than a floating-point NEG modifier applied to N.
1822     // Beside being contr-intuitive, such use of floating-point NEG modifier
1823     // results in different meaning of integer literals used with VOP1/2/C
1824     // and VOP3, for example:
1825     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1826     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1827     // Negative fp literals should be handled likewise for unifomtity
1828     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1829       Parser.Lex();
1830       Negate = true;
1831     }
1832   }
1833 
1834   if (getLexer().getKind() == AsmToken::Identifier &&
1835       Parser.getTok().getString() == "neg") {
1836     if (Negate) {
1837       Error(Parser.getTok().getLoc(), "expected register or immediate");
1838       return MatchOperand_ParseFail;
1839     }
1840     Parser.Lex();
1841     Negate2 = true;
1842     if (getLexer().isNot(AsmToken::LParen)) {
1843       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1844       return MatchOperand_ParseFail;
1845     }
1846     Parser.Lex();
1847   }
1848 
1849   if (getLexer().getKind() == AsmToken::Identifier &&
1850       Parser.getTok().getString() == "abs") {
1851     Parser.Lex();
1852     Abs2 = true;
1853     if (getLexer().isNot(AsmToken::LParen)) {
1854       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1855       return MatchOperand_ParseFail;
1856     }
1857     Parser.Lex();
1858   }
1859 
1860   if (getLexer().getKind() == AsmToken::Pipe) {
1861     if (Abs2) {
1862       Error(Parser.getTok().getLoc(), "expected register or immediate");
1863       return MatchOperand_ParseFail;
1864     }
1865     Parser.Lex();
1866     Abs = true;
1867   }
1868 
1869   OperandMatchResultTy Res;
1870   if (AllowImm) {
1871     Res = parseRegOrImm(Operands, Abs);
1872   } else {
1873     Res = parseReg(Operands);
1874   }
1875   if (Res != MatchOperand_Success) {
1876     return Res;
1877   }
1878 
1879   AMDGPUOperand::Modifiers Mods;
1880   if (Abs) {
1881     if (getLexer().getKind() != AsmToken::Pipe) {
1882       Error(Parser.getTok().getLoc(), "expected vertical bar");
1883       return MatchOperand_ParseFail;
1884     }
1885     Parser.Lex();
1886     Mods.Abs = true;
1887   }
1888   if (Abs2) {
1889     if (getLexer().isNot(AsmToken::RParen)) {
1890       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1891       return MatchOperand_ParseFail;
1892     }
1893     Parser.Lex();
1894     Mods.Abs = true;
1895   }
1896 
1897   if (Negate) {
1898     Mods.Neg = true;
1899   } else if (Negate2) {
1900     if (getLexer().isNot(AsmToken::RParen)) {
1901       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1902       return MatchOperand_ParseFail;
1903     }
1904     Parser.Lex();
1905     Mods.Neg = true;
1906   }
1907 
1908   if (Mods.hasFPModifiers()) {
1909     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1910     Op.setModifiers(Mods);
1911   }
1912   return MatchOperand_Success;
1913 }
1914 
1915 OperandMatchResultTy
1916 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1917                                                bool AllowImm) {
1918   bool Sext = false;
1919 
1920   if (getLexer().getKind() == AsmToken::Identifier &&
1921       Parser.getTok().getString() == "sext") {
1922     Parser.Lex();
1923     Sext = true;
1924     if (getLexer().isNot(AsmToken::LParen)) {
1925       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1926       return MatchOperand_ParseFail;
1927     }
1928     Parser.Lex();
1929   }
1930 
1931   OperandMatchResultTy Res;
1932   if (AllowImm) {
1933     Res = parseRegOrImm(Operands);
1934   } else {
1935     Res = parseReg(Operands);
1936   }
1937   if (Res != MatchOperand_Success) {
1938     return Res;
1939   }
1940 
1941   AMDGPUOperand::Modifiers Mods;
1942   if (Sext) {
1943     if (getLexer().isNot(AsmToken::RParen)) {
1944       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1945       return MatchOperand_ParseFail;
1946     }
1947     Parser.Lex();
1948     Mods.Sext = true;
1949   }
1950 
1951   if (Mods.hasIntModifiers()) {
1952     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1953     Op.setModifiers(Mods);
1954   }
1955 
1956   return MatchOperand_Success;
1957 }
1958 
1959 OperandMatchResultTy
1960 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1961   return parseRegOrImmWithFPInputMods(Operands, false);
1962 }
1963 
1964 OperandMatchResultTy
1965 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1966   return parseRegOrImmWithIntInputMods(Operands, false);
1967 }
1968 
1969 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1970   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1971   if (Reg) {
1972     Operands.push_back(std::move(Reg));
1973     return MatchOperand_Success;
1974   }
1975 
1976   const AsmToken &Tok = Parser.getTok();
1977   if (Tok.getString() == "off") {
1978     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1979                                                 AMDGPUOperand::ImmTyOff, false));
1980     Parser.Lex();
1981     return MatchOperand_Success;
1982   }
1983 
1984   return MatchOperand_NoMatch;
1985 }
1986 
1987 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1988   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1989 
1990   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1991       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1992       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1993       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
1994     return Match_InvalidOperand;
1995 
1996   if ((TSFlags & SIInstrFlags::VOP3) &&
1997       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
1998       getForcedEncodingSize() != 64)
1999     return Match_PreferE32;
2000 
2001   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2002       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2003     // v_mac_f32/16 allow only dst_sel == DWORD;
2004     auto OpNum =
2005         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2006     const auto &Op = Inst.getOperand(OpNum);
2007     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2008       return Match_InvalidOperand;
2009     }
2010   }
2011 
2012   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2013     // FIXME: Produces error without correct column reported.
2014     auto OpNum =
2015         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2016     const auto &Op = Inst.getOperand(OpNum);
2017     if (Op.getImm() != 0)
2018       return Match_InvalidOperand;
2019   }
2020 
2021   return Match_Success;
2022 }
2023 
2024 // What asm variants we should check
2025 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2026   if (getForcedEncodingSize() == 32) {
2027     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2028     return makeArrayRef(Variants);
2029   }
2030 
2031   if (isForcedVOP3()) {
2032     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2033     return makeArrayRef(Variants);
2034   }
2035 
2036   if (isForcedSDWA()) {
2037     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2038                                         AMDGPUAsmVariants::SDWA9};
2039     return makeArrayRef(Variants);
2040   }
2041 
2042   if (isForcedDPP()) {
2043     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2044     return makeArrayRef(Variants);
2045   }
2046 
2047   static const unsigned Variants[] = {
2048     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2049     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2050   };
2051 
2052   return makeArrayRef(Variants);
2053 }
2054 
2055 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2056   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2057   const unsigned Num = Desc.getNumImplicitUses();
2058   for (unsigned i = 0; i < Num; ++i) {
2059     unsigned Reg = Desc.ImplicitUses[i];
2060     switch (Reg) {
2061     case AMDGPU::FLAT_SCR:
2062     case AMDGPU::VCC:
2063     case AMDGPU::M0:
2064       return Reg;
2065     default:
2066       break;
2067     }
2068   }
2069   return AMDGPU::NoRegister;
2070 }
2071 
2072 // NB: This code is correct only when used to check constant
2073 // bus limitations because GFX7 support no f16 inline constants.
2074 // Note that there are no cases when a GFX7 opcode violates
2075 // constant bus limitations due to the use of an f16 constant.
2076 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2077                                        unsigned OpIdx) const {
2078   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2079 
2080   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2081     return false;
2082   }
2083 
2084   const MCOperand &MO = Inst.getOperand(OpIdx);
2085 
2086   int64_t Val = MO.getImm();
2087   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2088 
2089   switch (OpSize) { // expected operand size
2090   case 8:
2091     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2092   case 4:
2093     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2094   case 2: {
2095     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2096     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2097         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2098       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2099     } else {
2100       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2101     }
2102   }
2103   default:
2104     llvm_unreachable("invalid operand size");
2105   }
2106 }
2107 
2108 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2109   const MCOperand &MO = Inst.getOperand(OpIdx);
2110   if (MO.isImm()) {
2111     return !isInlineConstant(Inst, OpIdx);
2112   }
2113   return !MO.isReg() ||
2114          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2115 }
2116 
2117 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2118   const unsigned Opcode = Inst.getOpcode();
2119   const MCInstrDesc &Desc = MII.get(Opcode);
2120   unsigned ConstantBusUseCount = 0;
2121 
2122   if (Desc.TSFlags &
2123       (SIInstrFlags::VOPC |
2124        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2125        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2126        SIInstrFlags::SDWA)) {
2127     // Check special imm operands (used by madmk, etc)
2128     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2129       ++ConstantBusUseCount;
2130     }
2131 
2132     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2133     if (SGPRUsed != AMDGPU::NoRegister) {
2134       ++ConstantBusUseCount;
2135     }
2136 
2137     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2138     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2139     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2140 
2141     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2142 
2143     for (int OpIdx : OpIndices) {
2144       if (OpIdx == -1) break;
2145 
2146       const MCOperand &MO = Inst.getOperand(OpIdx);
2147       if (usesConstantBus(Inst, OpIdx)) {
2148         if (MO.isReg()) {
2149           const unsigned Reg = mc2PseudoReg(MO.getReg());
2150           // Pairs of registers with a partial intersections like these
2151           //   s0, s[0:1]
2152           //   flat_scratch_lo, flat_scratch
2153           //   flat_scratch_lo, flat_scratch_hi
2154           // are theoretically valid but they are disabled anyway.
2155           // Note that this code mimics SIInstrInfo::verifyInstruction
2156           if (Reg != SGPRUsed) {
2157             ++ConstantBusUseCount;
2158           }
2159           SGPRUsed = Reg;
2160         } else { // Expression or a literal
2161           ++ConstantBusUseCount;
2162         }
2163       }
2164     }
2165   }
2166 
2167   return ConstantBusUseCount <= 1;
2168 }
2169 
2170 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2171   const unsigned Opcode = Inst.getOpcode();
2172   const MCInstrDesc &Desc = MII.get(Opcode);
2173 
2174   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2175   if (DstIdx == -1 ||
2176       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2177     return true;
2178   }
2179 
2180   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2181 
2182   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2183   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2184   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2185 
2186   assert(DstIdx != -1);
2187   const MCOperand &Dst = Inst.getOperand(DstIdx);
2188   assert(Dst.isReg());
2189   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2190 
2191   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2192 
2193   for (int SrcIdx : SrcIndices) {
2194     if (SrcIdx == -1) break;
2195     const MCOperand &Src = Inst.getOperand(SrcIdx);
2196     if (Src.isReg()) {
2197       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2198       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2199         return false;
2200       }
2201     }
2202   }
2203 
2204   return true;
2205 }
2206 
2207 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2208 
2209   const unsigned Opc = Inst.getOpcode();
2210   const MCInstrDesc &Desc = MII.get(Opc);
2211 
2212   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2213     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2214     assert(ClampIdx != -1);
2215     return Inst.getOperand(ClampIdx).getImm() == 0;
2216   }
2217 
2218   return true;
2219 }
2220 
2221 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2222                                           const SMLoc &IDLoc) {
2223   if (!validateConstantBusLimitations(Inst)) {
2224     Error(IDLoc,
2225       "invalid operand (violates constant bus restrictions)");
2226     return false;
2227   }
2228   if (!validateEarlyClobberLimitations(Inst)) {
2229     Error(IDLoc,
2230       "destination must be different than all sources");
2231     return false;
2232   }
2233   if (!validateIntClampSupported(Inst)) {
2234     Error(IDLoc,
2235       "integer clamping is not supported on this GPU");
2236     return false;
2237   }
2238 
2239   return true;
2240 }
2241 
2242 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2243                                               OperandVector &Operands,
2244                                               MCStreamer &Out,
2245                                               uint64_t &ErrorInfo,
2246                                               bool MatchingInlineAsm) {
2247   MCInst Inst;
2248   unsigned Result = Match_Success;
2249   for (auto Variant : getMatchedVariants()) {
2250     uint64_t EI;
2251     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2252                                   Variant);
2253     // We order match statuses from least to most specific. We use most specific
2254     // status as resulting
2255     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2256     if ((R == Match_Success) ||
2257         (R == Match_PreferE32) ||
2258         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2259         (R == Match_InvalidOperand && Result != Match_MissingFeature
2260                                    && Result != Match_PreferE32) ||
2261         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2262                                    && Result != Match_MissingFeature
2263                                    && Result != Match_PreferE32)) {
2264       Result = R;
2265       ErrorInfo = EI;
2266     }
2267     if (R == Match_Success)
2268       break;
2269   }
2270 
2271   switch (Result) {
2272   default: break;
2273   case Match_Success:
2274     if (!validateInstruction(Inst, IDLoc)) {
2275       return true;
2276     }
2277     Inst.setLoc(IDLoc);
2278     Out.EmitInstruction(Inst, getSTI());
2279     return false;
2280 
2281   case Match_MissingFeature:
2282     return Error(IDLoc, "instruction not supported on this GPU");
2283 
2284   case Match_MnemonicFail:
2285     return Error(IDLoc, "unrecognized instruction mnemonic");
2286 
2287   case Match_InvalidOperand: {
2288     SMLoc ErrorLoc = IDLoc;
2289     if (ErrorInfo != ~0ULL) {
2290       if (ErrorInfo >= Operands.size()) {
2291         return Error(IDLoc, "too few operands for instruction");
2292       }
2293       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2294       if (ErrorLoc == SMLoc())
2295         ErrorLoc = IDLoc;
2296     }
2297     return Error(ErrorLoc, "invalid operand for instruction");
2298   }
2299 
2300   case Match_PreferE32:
2301     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2302                         "should be encoded as e32");
2303   }
2304   llvm_unreachable("Implement any new match types added!");
2305 }
2306 
2307 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2308   int64_t Tmp = -1;
2309   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2310     return true;
2311   }
2312   if (getParser().parseAbsoluteExpression(Tmp)) {
2313     return true;
2314   }
2315   Ret = static_cast<uint32_t>(Tmp);
2316   return false;
2317 }
2318 
2319 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2320                                                uint32_t &Minor) {
2321   if (ParseAsAbsoluteExpression(Major))
2322     return TokError("invalid major version");
2323 
2324   if (getLexer().isNot(AsmToken::Comma))
2325     return TokError("minor version number required, comma expected");
2326   Lex();
2327 
2328   if (ParseAsAbsoluteExpression(Minor))
2329     return TokError("invalid minor version");
2330 
2331   return false;
2332 }
2333 
2334 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2335   uint32_t Major;
2336   uint32_t Minor;
2337 
2338   if (ParseDirectiveMajorMinor(Major, Minor))
2339     return true;
2340 
2341   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2342   return false;
2343 }
2344 
2345 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2346   uint32_t Major;
2347   uint32_t Minor;
2348   uint32_t Stepping;
2349   StringRef VendorName;
2350   StringRef ArchName;
2351 
2352   // If this directive has no arguments, then use the ISA version for the
2353   // targeted GPU.
2354   if (getLexer().is(AsmToken::EndOfStatement)) {
2355     AMDGPU::IsaInfo::IsaVersion ISA =
2356         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2357     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2358                                                       ISA.Stepping,
2359                                                       "AMD", "AMDGPU");
2360     return false;
2361   }
2362 
2363   if (ParseDirectiveMajorMinor(Major, Minor))
2364     return true;
2365 
2366   if (getLexer().isNot(AsmToken::Comma))
2367     return TokError("stepping version number required, comma expected");
2368   Lex();
2369 
2370   if (ParseAsAbsoluteExpression(Stepping))
2371     return TokError("invalid stepping version");
2372 
2373   if (getLexer().isNot(AsmToken::Comma))
2374     return TokError("vendor name required, comma expected");
2375   Lex();
2376 
2377   if (getLexer().isNot(AsmToken::String))
2378     return TokError("invalid vendor name");
2379 
2380   VendorName = getLexer().getTok().getStringContents();
2381   Lex();
2382 
2383   if (getLexer().isNot(AsmToken::Comma))
2384     return TokError("arch name required, comma expected");
2385   Lex();
2386 
2387   if (getLexer().isNot(AsmToken::String))
2388     return TokError("invalid arch name");
2389 
2390   ArchName = getLexer().getTok().getStringContents();
2391   Lex();
2392 
2393   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2394                                                     VendorName, ArchName);
2395   return false;
2396 }
2397 
2398 bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
2399   std::string YamlString;
2400   raw_string_ostream YamlStream(YamlString);
2401 
2402   getLexer().setSkipSpace(false);
2403 
2404   bool FoundEnd = false;
2405   while (!getLexer().is(AsmToken::Eof)) {
2406     while (getLexer().is(AsmToken::Space)) {
2407       YamlStream << getLexer().getTok().getString();
2408       Lex();
2409     }
2410 
2411     if (getLexer().is(AsmToken::Identifier)) {
2412       StringRef ID = getLexer().getTok().getIdentifier();
2413       if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
2414         Lex();
2415         FoundEnd = true;
2416         break;
2417       }
2418     }
2419 
2420     YamlStream << Parser.parseStringToEndOfStatement()
2421                << getContext().getAsmInfo()->getSeparatorString();
2422 
2423     Parser.eatToEndOfStatement();
2424   }
2425 
2426   getLexer().setSkipSpace(true);
2427 
2428   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2429     return TokError(
2430         "expected directive .end_amdgpu_code_object_metadata not found");
2431   }
2432 
2433   YamlStream.flush();
2434 
2435   if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString))
2436     return Error(getParser().getTok().getLoc(), "invalid code object metadata");
2437 
2438   return false;
2439 }
2440 
2441 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2442                                                amd_kernel_code_t &Header) {
2443   SmallString<40> ErrStr;
2444   raw_svector_ostream Err(ErrStr);
2445   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2446     return TokError(Err.str());
2447   }
2448   Lex();
2449   return false;
2450 }
2451 
2452 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2453   amd_kernel_code_t Header;
2454   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2455 
2456   while (true) {
2457     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2458     // will set the current token to EndOfStatement.
2459     while(getLexer().is(AsmToken::EndOfStatement))
2460       Lex();
2461 
2462     if (getLexer().isNot(AsmToken::Identifier))
2463       return TokError("expected value identifier or .end_amd_kernel_code_t");
2464 
2465     StringRef ID = getLexer().getTok().getIdentifier();
2466     Lex();
2467 
2468     if (ID == ".end_amd_kernel_code_t")
2469       break;
2470 
2471     if (ParseAMDKernelCodeTValue(ID, Header))
2472       return true;
2473   }
2474 
2475   getTargetStreamer().EmitAMDKernelCodeT(Header);
2476 
2477   return false;
2478 }
2479 
2480 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2481   if (getLexer().isNot(AsmToken::Identifier))
2482     return TokError("expected symbol name");
2483 
2484   StringRef KernelName = Parser.getTok().getString();
2485 
2486   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2487                                            ELF::STT_AMDGPU_HSA_KERNEL);
2488   Lex();
2489   KernelScope.initialize(getContext());
2490   return false;
2491 }
2492 
2493 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2494   StringRef IDVal = DirectiveID.getString();
2495 
2496   if (IDVal == ".hsa_code_object_version")
2497     return ParseDirectiveHSACodeObjectVersion();
2498 
2499   if (IDVal == ".hsa_code_object_isa")
2500     return ParseDirectiveHSACodeObjectISA();
2501 
2502   if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
2503     return ParseDirectiveCodeObjectMetadata();
2504 
2505   if (IDVal == ".amd_kernel_code_t")
2506     return ParseDirectiveAMDKernelCodeT();
2507 
2508   if (IDVal == ".amdgpu_hsa_kernel")
2509     return ParseDirectiveAMDGPUHsaKernel();
2510 
2511   return true;
2512 }
2513 
2514 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2515                                            unsigned RegNo) const {
2516   if (isCI())
2517     return true;
2518 
2519   if (isSI()) {
2520     // No flat_scr
2521     switch (RegNo) {
2522     case AMDGPU::FLAT_SCR:
2523     case AMDGPU::FLAT_SCR_LO:
2524     case AMDGPU::FLAT_SCR_HI:
2525       return false;
2526     default:
2527       return true;
2528     }
2529   }
2530 
2531   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2532   // SI/CI have.
2533   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2534        R.isValid(); ++R) {
2535     if (*R == RegNo)
2536       return false;
2537   }
2538 
2539   return true;
2540 }
2541 
2542 OperandMatchResultTy
2543 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2544   // Try to parse with a custom parser
2545   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2546 
2547   // If we successfully parsed the operand or if there as an error parsing,
2548   // we are done.
2549   //
2550   // If we are parsing after we reach EndOfStatement then this means we
2551   // are appending default values to the Operands list.  This is only done
2552   // by custom parser, so we shouldn't continue on to the generic parsing.
2553   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2554       getLexer().is(AsmToken::EndOfStatement))
2555     return ResTy;
2556 
2557   ResTy = parseRegOrImm(Operands);
2558 
2559   if (ResTy == MatchOperand_Success)
2560     return ResTy;
2561 
2562   const auto &Tok = Parser.getTok();
2563   SMLoc S = Tok.getLoc();
2564 
2565   const MCExpr *Expr = nullptr;
2566   if (!Parser.parseExpression(Expr)) {
2567     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2568     return MatchOperand_Success;
2569   }
2570 
2571   // Possibly this is an instruction flag like 'gds'.
2572   if (Tok.getKind() == AsmToken::Identifier) {
2573     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2574     Parser.Lex();
2575     return MatchOperand_Success;
2576   }
2577 
2578   return MatchOperand_NoMatch;
2579 }
2580 
2581 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2582   // Clear any forced encodings from the previous instruction.
2583   setForcedEncodingSize(0);
2584   setForcedDPP(false);
2585   setForcedSDWA(false);
2586 
2587   if (Name.endswith("_e64")) {
2588     setForcedEncodingSize(64);
2589     return Name.substr(0, Name.size() - 4);
2590   } else if (Name.endswith("_e32")) {
2591     setForcedEncodingSize(32);
2592     return Name.substr(0, Name.size() - 4);
2593   } else if (Name.endswith("_dpp")) {
2594     setForcedDPP(true);
2595     return Name.substr(0, Name.size() - 4);
2596   } else if (Name.endswith("_sdwa")) {
2597     setForcedSDWA(true);
2598     return Name.substr(0, Name.size() - 5);
2599   }
2600   return Name;
2601 }
2602 
2603 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2604                                        StringRef Name,
2605                                        SMLoc NameLoc, OperandVector &Operands) {
2606   // Add the instruction mnemonic
2607   Name = parseMnemonicSuffix(Name);
2608   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2609 
2610   while (!getLexer().is(AsmToken::EndOfStatement)) {
2611     OperandMatchResultTy Res = parseOperand(Operands, Name);
2612 
2613     // Eat the comma or space if there is one.
2614     if (getLexer().is(AsmToken::Comma))
2615       Parser.Lex();
2616 
2617     switch (Res) {
2618       case MatchOperand_Success: break;
2619       case MatchOperand_ParseFail:
2620         Error(getLexer().getLoc(), "failed parsing operand.");
2621         while (!getLexer().is(AsmToken::EndOfStatement)) {
2622           Parser.Lex();
2623         }
2624         return true;
2625       case MatchOperand_NoMatch:
2626         Error(getLexer().getLoc(), "not a valid operand.");
2627         while (!getLexer().is(AsmToken::EndOfStatement)) {
2628           Parser.Lex();
2629         }
2630         return true;
2631     }
2632   }
2633 
2634   return false;
2635 }
2636 
2637 //===----------------------------------------------------------------------===//
2638 // Utility functions
2639 //===----------------------------------------------------------------------===//
2640 
2641 OperandMatchResultTy
2642 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2643   switch(getLexer().getKind()) {
2644     default: return MatchOperand_NoMatch;
2645     case AsmToken::Identifier: {
2646       StringRef Name = Parser.getTok().getString();
2647       if (!Name.equals(Prefix)) {
2648         return MatchOperand_NoMatch;
2649       }
2650 
2651       Parser.Lex();
2652       if (getLexer().isNot(AsmToken::Colon))
2653         return MatchOperand_ParseFail;
2654 
2655       Parser.Lex();
2656 
2657       bool IsMinus = false;
2658       if (getLexer().getKind() == AsmToken::Minus) {
2659         Parser.Lex();
2660         IsMinus = true;
2661       }
2662 
2663       if (getLexer().isNot(AsmToken::Integer))
2664         return MatchOperand_ParseFail;
2665 
2666       if (getParser().parseAbsoluteExpression(Int))
2667         return MatchOperand_ParseFail;
2668 
2669       if (IsMinus)
2670         Int = -Int;
2671       break;
2672     }
2673   }
2674   return MatchOperand_Success;
2675 }
2676 
2677 OperandMatchResultTy
2678 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2679                                     AMDGPUOperand::ImmTy ImmTy,
2680                                     bool (*ConvertResult)(int64_t&)) {
2681   SMLoc S = Parser.getTok().getLoc();
2682   int64_t Value = 0;
2683 
2684   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2685   if (Res != MatchOperand_Success)
2686     return Res;
2687 
2688   if (ConvertResult && !ConvertResult(Value)) {
2689     return MatchOperand_ParseFail;
2690   }
2691 
2692   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2693   return MatchOperand_Success;
2694 }
2695 
2696 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2697   const char *Prefix,
2698   OperandVector &Operands,
2699   AMDGPUOperand::ImmTy ImmTy,
2700   bool (*ConvertResult)(int64_t&)) {
2701   StringRef Name = Parser.getTok().getString();
2702   if (!Name.equals(Prefix))
2703     return MatchOperand_NoMatch;
2704 
2705   Parser.Lex();
2706   if (getLexer().isNot(AsmToken::Colon))
2707     return MatchOperand_ParseFail;
2708 
2709   Parser.Lex();
2710   if (getLexer().isNot(AsmToken::LBrac))
2711     return MatchOperand_ParseFail;
2712   Parser.Lex();
2713 
2714   unsigned Val = 0;
2715   SMLoc S = Parser.getTok().getLoc();
2716 
2717   // FIXME: How to verify the number of elements matches the number of src
2718   // operands?
2719   for (int I = 0; I < 4; ++I) {
2720     if (I != 0) {
2721       if (getLexer().is(AsmToken::RBrac))
2722         break;
2723 
2724       if (getLexer().isNot(AsmToken::Comma))
2725         return MatchOperand_ParseFail;
2726       Parser.Lex();
2727     }
2728 
2729     if (getLexer().isNot(AsmToken::Integer))
2730       return MatchOperand_ParseFail;
2731 
2732     int64_t Op;
2733     if (getParser().parseAbsoluteExpression(Op))
2734       return MatchOperand_ParseFail;
2735 
2736     if (Op != 0 && Op != 1)
2737       return MatchOperand_ParseFail;
2738     Val |= (Op << I);
2739   }
2740 
2741   Parser.Lex();
2742   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2743   return MatchOperand_Success;
2744 }
2745 
2746 OperandMatchResultTy
2747 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2748                                AMDGPUOperand::ImmTy ImmTy) {
2749   int64_t Bit = 0;
2750   SMLoc S = Parser.getTok().getLoc();
2751 
2752   // We are at the end of the statement, and this is a default argument, so
2753   // use a default value.
2754   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2755     switch(getLexer().getKind()) {
2756       case AsmToken::Identifier: {
2757         StringRef Tok = Parser.getTok().getString();
2758         if (Tok == Name) {
2759           Bit = 1;
2760           Parser.Lex();
2761         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2762           Bit = 0;
2763           Parser.Lex();
2764         } else {
2765           return MatchOperand_NoMatch;
2766         }
2767         break;
2768       }
2769       default:
2770         return MatchOperand_NoMatch;
2771     }
2772   }
2773 
2774   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2775   return MatchOperand_Success;
2776 }
2777 
2778 static void addOptionalImmOperand(
2779   MCInst& Inst, const OperandVector& Operands,
2780   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2781   AMDGPUOperand::ImmTy ImmT,
2782   int64_t Default = 0) {
2783   auto i = OptionalIdx.find(ImmT);
2784   if (i != OptionalIdx.end()) {
2785     unsigned Idx = i->second;
2786     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2787   } else {
2788     Inst.addOperand(MCOperand::createImm(Default));
2789   }
2790 }
2791 
2792 OperandMatchResultTy
2793 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2794   if (getLexer().isNot(AsmToken::Identifier)) {
2795     return MatchOperand_NoMatch;
2796   }
2797   StringRef Tok = Parser.getTok().getString();
2798   if (Tok != Prefix) {
2799     return MatchOperand_NoMatch;
2800   }
2801 
2802   Parser.Lex();
2803   if (getLexer().isNot(AsmToken::Colon)) {
2804     return MatchOperand_ParseFail;
2805   }
2806 
2807   Parser.Lex();
2808   if (getLexer().isNot(AsmToken::Identifier)) {
2809     return MatchOperand_ParseFail;
2810   }
2811 
2812   Value = Parser.getTok().getString();
2813   return MatchOperand_Success;
2814 }
2815 
2816 //===----------------------------------------------------------------------===//
2817 // ds
2818 //===----------------------------------------------------------------------===//
2819 
2820 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2821                                     const OperandVector &Operands) {
2822   OptionalImmIndexMap OptionalIdx;
2823 
2824   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2825     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2826 
2827     // Add the register arguments
2828     if (Op.isReg()) {
2829       Op.addRegOperands(Inst, 1);
2830       continue;
2831     }
2832 
2833     // Handle optional arguments
2834     OptionalIdx[Op.getImmTy()] = i;
2835   }
2836 
2837   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2838   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2839   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2840 
2841   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2842 }
2843 
2844 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2845                                 bool IsGdsHardcoded) {
2846   OptionalImmIndexMap OptionalIdx;
2847 
2848   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2849     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2850 
2851     // Add the register arguments
2852     if (Op.isReg()) {
2853       Op.addRegOperands(Inst, 1);
2854       continue;
2855     }
2856 
2857     if (Op.isToken() && Op.getToken() == "gds") {
2858       IsGdsHardcoded = true;
2859       continue;
2860     }
2861 
2862     // Handle optional arguments
2863     OptionalIdx[Op.getImmTy()] = i;
2864   }
2865 
2866   AMDGPUOperand::ImmTy OffsetType =
2867     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2868      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2869                                                       AMDGPUOperand::ImmTyOffset;
2870 
2871   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2872 
2873   if (!IsGdsHardcoded) {
2874     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2875   }
2876   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2877 }
2878 
2879 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2880   OptionalImmIndexMap OptionalIdx;
2881 
2882   unsigned OperandIdx[4];
2883   unsigned EnMask = 0;
2884   int SrcIdx = 0;
2885 
2886   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2887     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2888 
2889     // Add the register arguments
2890     if (Op.isReg()) {
2891       assert(SrcIdx < 4);
2892       OperandIdx[SrcIdx] = Inst.size();
2893       Op.addRegOperands(Inst, 1);
2894       ++SrcIdx;
2895       continue;
2896     }
2897 
2898     if (Op.isOff()) {
2899       assert(SrcIdx < 4);
2900       OperandIdx[SrcIdx] = Inst.size();
2901       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2902       ++SrcIdx;
2903       continue;
2904     }
2905 
2906     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2907       Op.addImmOperands(Inst, 1);
2908       continue;
2909     }
2910 
2911     if (Op.isToken() && Op.getToken() == "done")
2912       continue;
2913 
2914     // Handle optional arguments
2915     OptionalIdx[Op.getImmTy()] = i;
2916   }
2917 
2918   assert(SrcIdx == 4);
2919 
2920   bool Compr = false;
2921   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2922     Compr = true;
2923     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2924     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2925     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2926   }
2927 
2928   for (auto i = 0; i < SrcIdx; ++i) {
2929     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2930       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2931     }
2932   }
2933 
2934   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
2935   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
2936 
2937   Inst.addOperand(MCOperand::createImm(EnMask));
2938 }
2939 
2940 //===----------------------------------------------------------------------===//
2941 // s_waitcnt
2942 //===----------------------------------------------------------------------===//
2943 
2944 static bool
2945 encodeCnt(
2946   const AMDGPU::IsaInfo::IsaVersion ISA,
2947   int64_t &IntVal,
2948   int64_t CntVal,
2949   bool Saturate,
2950   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
2951   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
2952 {
2953   bool Failed = false;
2954 
2955   IntVal = encode(ISA, IntVal, CntVal);
2956   if (CntVal != decode(ISA, IntVal)) {
2957     if (Saturate) {
2958       IntVal = encode(ISA, IntVal, -1);
2959     } else {
2960       Failed = true;
2961     }
2962   }
2963   return Failed;
2964 }
2965 
2966 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
2967   StringRef CntName = Parser.getTok().getString();
2968   int64_t CntVal;
2969 
2970   Parser.Lex();
2971   if (getLexer().isNot(AsmToken::LParen))
2972     return true;
2973 
2974   Parser.Lex();
2975   if (getLexer().isNot(AsmToken::Integer))
2976     return true;
2977 
2978   SMLoc ValLoc = Parser.getTok().getLoc();
2979   if (getParser().parseAbsoluteExpression(CntVal))
2980     return true;
2981 
2982   AMDGPU::IsaInfo::IsaVersion ISA =
2983       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2984 
2985   bool Failed = true;
2986   bool Sat = CntName.endswith("_sat");
2987 
2988   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
2989     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
2990   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
2991     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
2992   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
2993     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
2994   }
2995 
2996   if (Failed) {
2997     Error(ValLoc, "too large value for " + CntName);
2998     return true;
2999   }
3000 
3001   if (getLexer().isNot(AsmToken::RParen)) {
3002     return true;
3003   }
3004 
3005   Parser.Lex();
3006   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3007     const AsmToken NextToken = getLexer().peekTok();
3008     if (NextToken.is(AsmToken::Identifier)) {
3009       Parser.Lex();
3010     }
3011   }
3012 
3013   return false;
3014 }
3015 
3016 OperandMatchResultTy
3017 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3018   AMDGPU::IsaInfo::IsaVersion ISA =
3019       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3020   int64_t Waitcnt = getWaitcntBitMask(ISA);
3021   SMLoc S = Parser.getTok().getLoc();
3022 
3023   switch(getLexer().getKind()) {
3024     default: return MatchOperand_ParseFail;
3025     case AsmToken::Integer:
3026       // The operand can be an integer value.
3027       if (getParser().parseAbsoluteExpression(Waitcnt))
3028         return MatchOperand_ParseFail;
3029       break;
3030 
3031     case AsmToken::Identifier:
3032       do {
3033         if (parseCnt(Waitcnt))
3034           return MatchOperand_ParseFail;
3035       } while(getLexer().isNot(AsmToken::EndOfStatement));
3036       break;
3037   }
3038   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3039   return MatchOperand_Success;
3040 }
3041 
3042 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3043                                           int64_t &Width) {
3044   using namespace llvm::AMDGPU::Hwreg;
3045 
3046   if (Parser.getTok().getString() != "hwreg")
3047     return true;
3048   Parser.Lex();
3049 
3050   if (getLexer().isNot(AsmToken::LParen))
3051     return true;
3052   Parser.Lex();
3053 
3054   if (getLexer().is(AsmToken::Identifier)) {
3055     HwReg.IsSymbolic = true;
3056     HwReg.Id = ID_UNKNOWN_;
3057     const StringRef tok = Parser.getTok().getString();
3058     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3059       if (tok == IdSymbolic[i]) {
3060         HwReg.Id = i;
3061         break;
3062       }
3063     }
3064     Parser.Lex();
3065   } else {
3066     HwReg.IsSymbolic = false;
3067     if (getLexer().isNot(AsmToken::Integer))
3068       return true;
3069     if (getParser().parseAbsoluteExpression(HwReg.Id))
3070       return true;
3071   }
3072 
3073   if (getLexer().is(AsmToken::RParen)) {
3074     Parser.Lex();
3075     return false;
3076   }
3077 
3078   // optional params
3079   if (getLexer().isNot(AsmToken::Comma))
3080     return true;
3081   Parser.Lex();
3082 
3083   if (getLexer().isNot(AsmToken::Integer))
3084     return true;
3085   if (getParser().parseAbsoluteExpression(Offset))
3086     return true;
3087 
3088   if (getLexer().isNot(AsmToken::Comma))
3089     return true;
3090   Parser.Lex();
3091 
3092   if (getLexer().isNot(AsmToken::Integer))
3093     return true;
3094   if (getParser().parseAbsoluteExpression(Width))
3095     return true;
3096 
3097   if (getLexer().isNot(AsmToken::RParen))
3098     return true;
3099   Parser.Lex();
3100 
3101   return false;
3102 }
3103 
3104 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3105   using namespace llvm::AMDGPU::Hwreg;
3106 
3107   int64_t Imm16Val = 0;
3108   SMLoc S = Parser.getTok().getLoc();
3109 
3110   switch(getLexer().getKind()) {
3111     default: return MatchOperand_NoMatch;
3112     case AsmToken::Integer:
3113       // The operand can be an integer value.
3114       if (getParser().parseAbsoluteExpression(Imm16Val))
3115         return MatchOperand_NoMatch;
3116       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3117         Error(S, "invalid immediate: only 16-bit values are legal");
3118         // Do not return error code, but create an imm operand anyway and proceed
3119         // to the next operand, if any. That avoids unneccessary error messages.
3120       }
3121       break;
3122 
3123     case AsmToken::Identifier: {
3124         OperandInfoTy HwReg(ID_UNKNOWN_);
3125         int64_t Offset = OFFSET_DEFAULT_;
3126         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3127         if (parseHwregConstruct(HwReg, Offset, Width))
3128           return MatchOperand_ParseFail;
3129         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3130           if (HwReg.IsSymbolic)
3131             Error(S, "invalid symbolic name of hardware register");
3132           else
3133             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3134         }
3135         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3136           Error(S, "invalid bit offset: only 5-bit values are legal");
3137         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3138           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3139         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3140       }
3141       break;
3142   }
3143   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3144   return MatchOperand_Success;
3145 }
3146 
3147 bool AMDGPUOperand::isSWaitCnt() const {
3148   return isImm();
3149 }
3150 
3151 bool AMDGPUOperand::isHwreg() const {
3152   return isImmTy(ImmTyHwreg);
3153 }
3154 
3155 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3156   using namespace llvm::AMDGPU::SendMsg;
3157 
3158   if (Parser.getTok().getString() != "sendmsg")
3159     return true;
3160   Parser.Lex();
3161 
3162   if (getLexer().isNot(AsmToken::LParen))
3163     return true;
3164   Parser.Lex();
3165 
3166   if (getLexer().is(AsmToken::Identifier)) {
3167     Msg.IsSymbolic = true;
3168     Msg.Id = ID_UNKNOWN_;
3169     const std::string tok = Parser.getTok().getString();
3170     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3171       switch(i) {
3172         default: continue; // Omit gaps.
3173         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3174       }
3175       if (tok == IdSymbolic[i]) {
3176         Msg.Id = i;
3177         break;
3178       }
3179     }
3180     Parser.Lex();
3181   } else {
3182     Msg.IsSymbolic = false;
3183     if (getLexer().isNot(AsmToken::Integer))
3184       return true;
3185     if (getParser().parseAbsoluteExpression(Msg.Id))
3186       return true;
3187     if (getLexer().is(AsmToken::Integer))
3188       if (getParser().parseAbsoluteExpression(Msg.Id))
3189         Msg.Id = ID_UNKNOWN_;
3190   }
3191   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3192     return false;
3193 
3194   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3195     if (getLexer().isNot(AsmToken::RParen))
3196       return true;
3197     Parser.Lex();
3198     return false;
3199   }
3200 
3201   if (getLexer().isNot(AsmToken::Comma))
3202     return true;
3203   Parser.Lex();
3204 
3205   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3206   Operation.Id = ID_UNKNOWN_;
3207   if (getLexer().is(AsmToken::Identifier)) {
3208     Operation.IsSymbolic = true;
3209     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3210     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3211     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3212     const StringRef Tok = Parser.getTok().getString();
3213     for (int i = F; i < L; ++i) {
3214       if (Tok == S[i]) {
3215         Operation.Id = i;
3216         break;
3217       }
3218     }
3219     Parser.Lex();
3220   } else {
3221     Operation.IsSymbolic = false;
3222     if (getLexer().isNot(AsmToken::Integer))
3223       return true;
3224     if (getParser().parseAbsoluteExpression(Operation.Id))
3225       return true;
3226   }
3227 
3228   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3229     // Stream id is optional.
3230     if (getLexer().is(AsmToken::RParen)) {
3231       Parser.Lex();
3232       return false;
3233     }
3234 
3235     if (getLexer().isNot(AsmToken::Comma))
3236       return true;
3237     Parser.Lex();
3238 
3239     if (getLexer().isNot(AsmToken::Integer))
3240       return true;
3241     if (getParser().parseAbsoluteExpression(StreamId))
3242       return true;
3243   }
3244 
3245   if (getLexer().isNot(AsmToken::RParen))
3246     return true;
3247   Parser.Lex();
3248   return false;
3249 }
3250 
3251 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3252   if (getLexer().getKind() != AsmToken::Identifier)
3253     return MatchOperand_NoMatch;
3254 
3255   StringRef Str = Parser.getTok().getString();
3256   int Slot = StringSwitch<int>(Str)
3257     .Case("p10", 0)
3258     .Case("p20", 1)
3259     .Case("p0", 2)
3260     .Default(-1);
3261 
3262   SMLoc S = Parser.getTok().getLoc();
3263   if (Slot == -1)
3264     return MatchOperand_ParseFail;
3265 
3266   Parser.Lex();
3267   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3268                                               AMDGPUOperand::ImmTyInterpSlot));
3269   return MatchOperand_Success;
3270 }
3271 
3272 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3273   if (getLexer().getKind() != AsmToken::Identifier)
3274     return MatchOperand_NoMatch;
3275 
3276   StringRef Str = Parser.getTok().getString();
3277   if (!Str.startswith("attr"))
3278     return MatchOperand_NoMatch;
3279 
3280   StringRef Chan = Str.take_back(2);
3281   int AttrChan = StringSwitch<int>(Chan)
3282     .Case(".x", 0)
3283     .Case(".y", 1)
3284     .Case(".z", 2)
3285     .Case(".w", 3)
3286     .Default(-1);
3287   if (AttrChan == -1)
3288     return MatchOperand_ParseFail;
3289 
3290   Str = Str.drop_back(2).drop_front(4);
3291 
3292   uint8_t Attr;
3293   if (Str.getAsInteger(10, Attr))
3294     return MatchOperand_ParseFail;
3295 
3296   SMLoc S = Parser.getTok().getLoc();
3297   Parser.Lex();
3298   if (Attr > 63) {
3299     Error(S, "out of bounds attr");
3300     return MatchOperand_Success;
3301   }
3302 
3303   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3304 
3305   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3306                                               AMDGPUOperand::ImmTyInterpAttr));
3307   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3308                                               AMDGPUOperand::ImmTyAttrChan));
3309   return MatchOperand_Success;
3310 }
3311 
3312 void AMDGPUAsmParser::errorExpTgt() {
3313   Error(Parser.getTok().getLoc(), "invalid exp target");
3314 }
3315 
3316 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3317                                                       uint8_t &Val) {
3318   if (Str == "null") {
3319     Val = 9;
3320     return MatchOperand_Success;
3321   }
3322 
3323   if (Str.startswith("mrt")) {
3324     Str = Str.drop_front(3);
3325     if (Str == "z") { // == mrtz
3326       Val = 8;
3327       return MatchOperand_Success;
3328     }
3329 
3330     if (Str.getAsInteger(10, Val))
3331       return MatchOperand_ParseFail;
3332 
3333     if (Val > 7)
3334       errorExpTgt();
3335 
3336     return MatchOperand_Success;
3337   }
3338 
3339   if (Str.startswith("pos")) {
3340     Str = Str.drop_front(3);
3341     if (Str.getAsInteger(10, Val))
3342       return MatchOperand_ParseFail;
3343 
3344     if (Val > 3)
3345       errorExpTgt();
3346 
3347     Val += 12;
3348     return MatchOperand_Success;
3349   }
3350 
3351   if (Str.startswith("param")) {
3352     Str = Str.drop_front(5);
3353     if (Str.getAsInteger(10, Val))
3354       return MatchOperand_ParseFail;
3355 
3356     if (Val >= 32)
3357       errorExpTgt();
3358 
3359     Val += 32;
3360     return MatchOperand_Success;
3361   }
3362 
3363   if (Str.startswith("invalid_target_")) {
3364     Str = Str.drop_front(15);
3365     if (Str.getAsInteger(10, Val))
3366       return MatchOperand_ParseFail;
3367 
3368     errorExpTgt();
3369     return MatchOperand_Success;
3370   }
3371 
3372   return MatchOperand_NoMatch;
3373 }
3374 
3375 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3376   uint8_t Val;
3377   StringRef Str = Parser.getTok().getString();
3378 
3379   auto Res = parseExpTgtImpl(Str, Val);
3380   if (Res != MatchOperand_Success)
3381     return Res;
3382 
3383   SMLoc S = Parser.getTok().getLoc();
3384   Parser.Lex();
3385 
3386   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3387                                               AMDGPUOperand::ImmTyExpTgt));
3388   return MatchOperand_Success;
3389 }
3390 
3391 OperandMatchResultTy
3392 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3393   using namespace llvm::AMDGPU::SendMsg;
3394 
3395   int64_t Imm16Val = 0;
3396   SMLoc S = Parser.getTok().getLoc();
3397 
3398   switch(getLexer().getKind()) {
3399   default:
3400     return MatchOperand_NoMatch;
3401   case AsmToken::Integer:
3402     // The operand can be an integer value.
3403     if (getParser().parseAbsoluteExpression(Imm16Val))
3404       return MatchOperand_NoMatch;
3405     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3406       Error(S, "invalid immediate: only 16-bit values are legal");
3407       // Do not return error code, but create an imm operand anyway and proceed
3408       // to the next operand, if any. That avoids unneccessary error messages.
3409     }
3410     break;
3411   case AsmToken::Identifier: {
3412       OperandInfoTy Msg(ID_UNKNOWN_);
3413       OperandInfoTy Operation(OP_UNKNOWN_);
3414       int64_t StreamId = STREAM_ID_DEFAULT_;
3415       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3416         return MatchOperand_ParseFail;
3417       do {
3418         // Validate and encode message ID.
3419         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3420                 || Msg.Id == ID_SYSMSG)) {
3421           if (Msg.IsSymbolic)
3422             Error(S, "invalid/unsupported symbolic name of message");
3423           else
3424             Error(S, "invalid/unsupported code of message");
3425           break;
3426         }
3427         Imm16Val = (Msg.Id << ID_SHIFT_);
3428         // Validate and encode operation ID.
3429         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3430           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3431             if (Operation.IsSymbolic)
3432               Error(S, "invalid symbolic name of GS_OP");
3433             else
3434               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3435             break;
3436           }
3437           if (Operation.Id == OP_GS_NOP
3438               && Msg.Id != ID_GS_DONE) {
3439             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3440             break;
3441           }
3442           Imm16Val |= (Operation.Id << OP_SHIFT_);
3443         }
3444         if (Msg.Id == ID_SYSMSG) {
3445           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3446             if (Operation.IsSymbolic)
3447               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3448             else
3449               Error(S, "invalid/unsupported code of SYSMSG_OP");
3450             break;
3451           }
3452           Imm16Val |= (Operation.Id << OP_SHIFT_);
3453         }
3454         // Validate and encode stream ID.
3455         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3456           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3457             Error(S, "invalid stream id: only 2-bit values are legal");
3458             break;
3459           }
3460           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3461         }
3462       } while (false);
3463     }
3464     break;
3465   }
3466   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3467   return MatchOperand_Success;
3468 }
3469 
3470 bool AMDGPUOperand::isSendMsg() const {
3471   return isImmTy(ImmTySendMsg);
3472 }
3473 
3474 //===----------------------------------------------------------------------===//
3475 // parser helpers
3476 //===----------------------------------------------------------------------===//
3477 
3478 bool
3479 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3480   if (getLexer().getKind() == AsmToken::Identifier &&
3481       Parser.getTok().getString() == Id) {
3482     Parser.Lex();
3483     return true;
3484   }
3485   return false;
3486 }
3487 
3488 bool
3489 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3490   if (getLexer().getKind() == Kind) {
3491     Parser.Lex();
3492     return true;
3493   }
3494   return false;
3495 }
3496 
3497 bool
3498 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3499                            const StringRef ErrMsg) {
3500   if (!trySkipToken(Kind)) {
3501     Error(Parser.getTok().getLoc(), ErrMsg);
3502     return false;
3503   }
3504   return true;
3505 }
3506 
3507 bool
3508 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3509   return !getParser().parseAbsoluteExpression(Imm);
3510 }
3511 
3512 bool
3513 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3514   SMLoc S = Parser.getTok().getLoc();
3515   if (getLexer().getKind() == AsmToken::String) {
3516     Val = Parser.getTok().getStringContents();
3517     Parser.Lex();
3518     return true;
3519   } else {
3520     Error(S, ErrMsg);
3521     return false;
3522   }
3523 }
3524 
3525 //===----------------------------------------------------------------------===//
3526 // swizzle
3527 //===----------------------------------------------------------------------===//
3528 
3529 LLVM_READNONE
3530 static unsigned
3531 encodeBitmaskPerm(const unsigned AndMask,
3532                   const unsigned OrMask,
3533                   const unsigned XorMask) {
3534   using namespace llvm::AMDGPU::Swizzle;
3535 
3536   return BITMASK_PERM_ENC |
3537          (AndMask << BITMASK_AND_SHIFT) |
3538          (OrMask  << BITMASK_OR_SHIFT)  |
3539          (XorMask << BITMASK_XOR_SHIFT);
3540 }
3541 
3542 bool
3543 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3544                                       const unsigned MinVal,
3545                                       const unsigned MaxVal,
3546                                       const StringRef ErrMsg) {
3547   for (unsigned i = 0; i < OpNum; ++i) {
3548     if (!skipToken(AsmToken::Comma, "expected a comma")){
3549       return false;
3550     }
3551     SMLoc ExprLoc = Parser.getTok().getLoc();
3552     if (!parseExpr(Op[i])) {
3553       return false;
3554     }
3555     if (Op[i] < MinVal || Op[i] > MaxVal) {
3556       Error(ExprLoc, ErrMsg);
3557       return false;
3558     }
3559   }
3560 
3561   return true;
3562 }
3563 
3564 bool
3565 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3566   using namespace llvm::AMDGPU::Swizzle;
3567 
3568   int64_t Lane[LANE_NUM];
3569   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3570                            "expected a 2-bit lane id")) {
3571     Imm = QUAD_PERM_ENC;
3572     for (auto i = 0; i < LANE_NUM; ++i) {
3573       Imm |= Lane[i] << (LANE_SHIFT * i);
3574     }
3575     return true;
3576   }
3577   return false;
3578 }
3579 
3580 bool
3581 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3582   using namespace llvm::AMDGPU::Swizzle;
3583 
3584   SMLoc S = Parser.getTok().getLoc();
3585   int64_t GroupSize;
3586   int64_t LaneIdx;
3587 
3588   if (!parseSwizzleOperands(1, &GroupSize,
3589                             2, 32,
3590                             "group size must be in the interval [2,32]")) {
3591     return false;
3592   }
3593   if (!isPowerOf2_64(GroupSize)) {
3594     Error(S, "group size must be a power of two");
3595     return false;
3596   }
3597   if (parseSwizzleOperands(1, &LaneIdx,
3598                            0, GroupSize - 1,
3599                            "lane id must be in the interval [0,group size - 1]")) {
3600     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3601     return true;
3602   }
3603   return false;
3604 }
3605 
3606 bool
3607 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3608   using namespace llvm::AMDGPU::Swizzle;
3609 
3610   SMLoc S = Parser.getTok().getLoc();
3611   int64_t GroupSize;
3612 
3613   if (!parseSwizzleOperands(1, &GroupSize,
3614       2, 32, "group size must be in the interval [2,32]")) {
3615     return false;
3616   }
3617   if (!isPowerOf2_64(GroupSize)) {
3618     Error(S, "group size must be a power of two");
3619     return false;
3620   }
3621 
3622   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3623   return true;
3624 }
3625 
3626 bool
3627 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3628   using namespace llvm::AMDGPU::Swizzle;
3629 
3630   SMLoc S = Parser.getTok().getLoc();
3631   int64_t GroupSize;
3632 
3633   if (!parseSwizzleOperands(1, &GroupSize,
3634       1, 16, "group size must be in the interval [1,16]")) {
3635     return false;
3636   }
3637   if (!isPowerOf2_64(GroupSize)) {
3638     Error(S, "group size must be a power of two");
3639     return false;
3640   }
3641 
3642   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3643   return true;
3644 }
3645 
3646 bool
3647 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3648   using namespace llvm::AMDGPU::Swizzle;
3649 
3650   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3651     return false;
3652   }
3653 
3654   StringRef Ctl;
3655   SMLoc StrLoc = Parser.getTok().getLoc();
3656   if (!parseString(Ctl)) {
3657     return false;
3658   }
3659   if (Ctl.size() != BITMASK_WIDTH) {
3660     Error(StrLoc, "expected a 5-character mask");
3661     return false;
3662   }
3663 
3664   unsigned AndMask = 0;
3665   unsigned OrMask = 0;
3666   unsigned XorMask = 0;
3667 
3668   for (size_t i = 0; i < Ctl.size(); ++i) {
3669     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3670     switch(Ctl[i]) {
3671     default:
3672       Error(StrLoc, "invalid mask");
3673       return false;
3674     case '0':
3675       break;
3676     case '1':
3677       OrMask |= Mask;
3678       break;
3679     case 'p':
3680       AndMask |= Mask;
3681       break;
3682     case 'i':
3683       AndMask |= Mask;
3684       XorMask |= Mask;
3685       break;
3686     }
3687   }
3688 
3689   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3690   return true;
3691 }
3692 
3693 bool
3694 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3695 
3696   SMLoc OffsetLoc = Parser.getTok().getLoc();
3697 
3698   if (!parseExpr(Imm)) {
3699     return false;
3700   }
3701   if (!isUInt<16>(Imm)) {
3702     Error(OffsetLoc, "expected a 16-bit offset");
3703     return false;
3704   }
3705   return true;
3706 }
3707 
3708 bool
3709 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3710   using namespace llvm::AMDGPU::Swizzle;
3711 
3712   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3713 
3714     SMLoc ModeLoc = Parser.getTok().getLoc();
3715     bool Ok = false;
3716 
3717     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3718       Ok = parseSwizzleQuadPerm(Imm);
3719     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3720       Ok = parseSwizzleBitmaskPerm(Imm);
3721     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3722       Ok = parseSwizzleBroadcast(Imm);
3723     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3724       Ok = parseSwizzleSwap(Imm);
3725     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3726       Ok = parseSwizzleReverse(Imm);
3727     } else {
3728       Error(ModeLoc, "expected a swizzle mode");
3729     }
3730 
3731     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3732   }
3733 
3734   return false;
3735 }
3736 
3737 OperandMatchResultTy
3738 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3739   SMLoc S = Parser.getTok().getLoc();
3740   int64_t Imm = 0;
3741 
3742   if (trySkipId("offset")) {
3743 
3744     bool Ok = false;
3745     if (skipToken(AsmToken::Colon, "expected a colon")) {
3746       if (trySkipId("swizzle")) {
3747         Ok = parseSwizzleMacro(Imm);
3748       } else {
3749         Ok = parseSwizzleOffset(Imm);
3750       }
3751     }
3752 
3753     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3754 
3755     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3756   } else {
3757     return MatchOperand_NoMatch;
3758   }
3759 }
3760 
3761 bool
3762 AMDGPUOperand::isSwizzle() const {
3763   return isImmTy(ImmTySwizzle);
3764 }
3765 
3766 //===----------------------------------------------------------------------===//
3767 // sopp branch targets
3768 //===----------------------------------------------------------------------===//
3769 
3770 OperandMatchResultTy
3771 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3772   SMLoc S = Parser.getTok().getLoc();
3773 
3774   switch (getLexer().getKind()) {
3775     default: return MatchOperand_ParseFail;
3776     case AsmToken::Integer: {
3777       int64_t Imm;
3778       if (getParser().parseAbsoluteExpression(Imm))
3779         return MatchOperand_ParseFail;
3780       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3781       return MatchOperand_Success;
3782     }
3783 
3784     case AsmToken::Identifier:
3785       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3786           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3787                                   Parser.getTok().getString()), getContext()), S));
3788       Parser.Lex();
3789       return MatchOperand_Success;
3790   }
3791 }
3792 
3793 //===----------------------------------------------------------------------===//
3794 // mubuf
3795 //===----------------------------------------------------------------------===//
3796 
3797 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3798   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3799 }
3800 
3801 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3802   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3803 }
3804 
3805 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3806   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3807 }
3808 
3809 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3810                                const OperandVector &Operands,
3811                                bool IsAtomic, bool IsAtomicReturn) {
3812   OptionalImmIndexMap OptionalIdx;
3813   assert(IsAtomicReturn ? IsAtomic : true);
3814 
3815   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3816     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3817 
3818     // Add the register arguments
3819     if (Op.isReg()) {
3820       Op.addRegOperands(Inst, 1);
3821       continue;
3822     }
3823 
3824     // Handle the case where soffset is an immediate
3825     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3826       Op.addImmOperands(Inst, 1);
3827       continue;
3828     }
3829 
3830     // Handle tokens like 'offen' which are sometimes hard-coded into the
3831     // asm string.  There are no MCInst operands for these.
3832     if (Op.isToken()) {
3833       continue;
3834     }
3835     assert(Op.isImm());
3836 
3837     // Handle optional arguments
3838     OptionalIdx[Op.getImmTy()] = i;
3839   }
3840 
3841   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3842   if (IsAtomicReturn) {
3843     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3844     Inst.insert(I, *I);
3845   }
3846 
3847   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3848   if (!IsAtomic) { // glc is hard-coded.
3849     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3850   }
3851   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3852   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3853 }
3854 
3855 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3856   OptionalImmIndexMap OptionalIdx;
3857 
3858   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3859     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3860 
3861     // Add the register arguments
3862     if (Op.isReg()) {
3863       Op.addRegOperands(Inst, 1);
3864       continue;
3865     }
3866 
3867     // Handle the case where soffset is an immediate
3868     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3869       Op.addImmOperands(Inst, 1);
3870       continue;
3871     }
3872 
3873     // Handle tokens like 'offen' which are sometimes hard-coded into the
3874     // asm string.  There are no MCInst operands for these.
3875     if (Op.isToken()) {
3876       continue;
3877     }
3878     assert(Op.isImm());
3879 
3880     // Handle optional arguments
3881     OptionalIdx[Op.getImmTy()] = i;
3882   }
3883 
3884   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3885                         AMDGPUOperand::ImmTyOffset);
3886   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3887   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3888   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3889   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3890   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3891 }
3892 
3893 //===----------------------------------------------------------------------===//
3894 // mimg
3895 //===----------------------------------------------------------------------===//
3896 
3897 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3898                               bool IsAtomic) {
3899   unsigned I = 1;
3900   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3901   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3902     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3903   }
3904 
3905   if (IsAtomic) {
3906     // Add src, same as dst
3907     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3908   }
3909 
3910   OptionalImmIndexMap OptionalIdx;
3911 
3912   for (unsigned E = Operands.size(); I != E; ++I) {
3913     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3914 
3915     // Add the register arguments
3916     if (Op.isRegOrImm()) {
3917       Op.addRegOrImmOperands(Inst, 1);
3918       continue;
3919     } else if (Op.isImmModifier()) {
3920       OptionalIdx[Op.getImmTy()] = I;
3921     } else {
3922       llvm_unreachable("unexpected operand type");
3923     }
3924   }
3925 
3926   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3927   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3928   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3929   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3930   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3931   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3932   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3933   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3934 }
3935 
3936 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
3937   cvtMIMG(Inst, Operands, true);
3938 }
3939 
3940 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
3941   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
3942 }
3943 
3944 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
3945   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
3946 }
3947 
3948 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
3949   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
3950 }
3951 
3952 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
3953   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
3954 }
3955 
3956 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
3957   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
3958 }
3959 
3960 //===----------------------------------------------------------------------===//
3961 // smrd
3962 //===----------------------------------------------------------------------===//
3963 
3964 bool AMDGPUOperand::isSMRDOffset8() const {
3965   return isImm() && isUInt<8>(getImm());
3966 }
3967 
3968 bool AMDGPUOperand::isSMRDOffset20() const {
3969   return isImm() && isUInt<20>(getImm());
3970 }
3971 
3972 bool AMDGPUOperand::isSMRDLiteralOffset() const {
3973   // 32-bit literals are only supported on CI and we only want to use them
3974   // when the offset is > 8-bits.
3975   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
3976 }
3977 
3978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
3979   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3980 }
3981 
3982 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
3983   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3984 }
3985 
3986 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
3987   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3988 }
3989 
3990 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
3991   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3992 }
3993 
3994 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
3995   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
3996 }
3997 
3998 //===----------------------------------------------------------------------===//
3999 // vop3
4000 //===----------------------------------------------------------------------===//
4001 
4002 static bool ConvertOmodMul(int64_t &Mul) {
4003   if (Mul != 1 && Mul != 2 && Mul != 4)
4004     return false;
4005 
4006   Mul >>= 1;
4007   return true;
4008 }
4009 
4010 static bool ConvertOmodDiv(int64_t &Div) {
4011   if (Div == 1) {
4012     Div = 0;
4013     return true;
4014   }
4015 
4016   if (Div == 2) {
4017     Div = 3;
4018     return true;
4019   }
4020 
4021   return false;
4022 }
4023 
4024 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4025   if (BoundCtrl == 0) {
4026     BoundCtrl = 1;
4027     return true;
4028   }
4029 
4030   if (BoundCtrl == -1) {
4031     BoundCtrl = 0;
4032     return true;
4033   }
4034 
4035   return false;
4036 }
4037 
4038 // Note: the order in this table matches the order of operands in AsmString.
4039 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4040   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4041   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4042   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4043   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4044   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4045   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4046   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4047   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4048   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4049   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4050   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4051   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4052   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4053   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4054   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4055   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4056   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4057   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4058   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4059   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4060   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4061   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4062   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4063   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4064   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4065   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4066   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4067   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4068   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4069   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4070   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4071   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4072   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4073 };
4074 
4075 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4076   OperandMatchResultTy res;
4077   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4078     // try to parse any optional operand here
4079     if (Op.IsBit) {
4080       res = parseNamedBit(Op.Name, Operands, Op.Type);
4081     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4082       res = parseOModOperand(Operands);
4083     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4084                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4085                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4086       res = parseSDWASel(Operands, Op.Name, Op.Type);
4087     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4088       res = parseSDWADstUnused(Operands);
4089     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4090                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4091                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4092                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4093       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4094                                         Op.ConvertResult);
4095     } else {
4096       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4097     }
4098     if (res != MatchOperand_NoMatch) {
4099       return res;
4100     }
4101   }
4102   return MatchOperand_NoMatch;
4103 }
4104 
4105 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4106   StringRef Name = Parser.getTok().getString();
4107   if (Name == "mul") {
4108     return parseIntWithPrefix("mul", Operands,
4109                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4110   }
4111 
4112   if (Name == "div") {
4113     return parseIntWithPrefix("div", Operands,
4114                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4115   }
4116 
4117   return MatchOperand_NoMatch;
4118 }
4119 
4120 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4121   cvtVOP3P(Inst, Operands);
4122 
4123   int Opc = Inst.getOpcode();
4124 
4125   int SrcNum;
4126   const int Ops[] = { AMDGPU::OpName::src0,
4127                       AMDGPU::OpName::src1,
4128                       AMDGPU::OpName::src2 };
4129   for (SrcNum = 0;
4130        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4131        ++SrcNum);
4132   assert(SrcNum > 0);
4133 
4134   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4135   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4136 
4137   if ((OpSel & (1 << SrcNum)) != 0) {
4138     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4139     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4140     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4141   }
4142 }
4143 
4144 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4145       // 1. This operand is input modifiers
4146   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4147       // 2. This is not last operand
4148       && Desc.NumOperands > (OpNum + 1)
4149       // 3. Next operand is register class
4150       && Desc.OpInfo[OpNum + 1].RegClass != -1
4151       // 4. Next register is not tied to any other operand
4152       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4153 }
4154 
4155 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4156 {
4157   OptionalImmIndexMap OptionalIdx;
4158   unsigned Opc = Inst.getOpcode();
4159 
4160   unsigned I = 1;
4161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4162   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4163     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4164   }
4165 
4166   for (unsigned E = Operands.size(); I != E; ++I) {
4167     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4168     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4169       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4170     } else if (Op.isInterpSlot() ||
4171                Op.isInterpAttr() ||
4172                Op.isAttrChan()) {
4173       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4174     } else if (Op.isImmModifier()) {
4175       OptionalIdx[Op.getImmTy()] = I;
4176     } else {
4177       llvm_unreachable("unhandled operand type");
4178     }
4179   }
4180 
4181   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4182     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4183   }
4184 
4185   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4186     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4187   }
4188 
4189   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4190     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4191   }
4192 }
4193 
4194 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4195                               OptionalImmIndexMap &OptionalIdx) {
4196   unsigned Opc = Inst.getOpcode();
4197 
4198   unsigned I = 1;
4199   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4200   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4201     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4202   }
4203 
4204   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4205     // This instruction has src modifiers
4206     for (unsigned E = Operands.size(); I != E; ++I) {
4207       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4208       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4209         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4210       } else if (Op.isImmModifier()) {
4211         OptionalIdx[Op.getImmTy()] = I;
4212       } else if (Op.isRegOrImm()) {
4213         Op.addRegOrImmOperands(Inst, 1);
4214       } else {
4215         llvm_unreachable("unhandled operand type");
4216       }
4217     }
4218   } else {
4219     // No src modifiers
4220     for (unsigned E = Operands.size(); I != E; ++I) {
4221       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4222       if (Op.isMod()) {
4223         OptionalIdx[Op.getImmTy()] = I;
4224       } else {
4225         Op.addRegOrImmOperands(Inst, 1);
4226       }
4227     }
4228   }
4229 
4230   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4231     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4232   }
4233 
4234   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4235     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4236   }
4237 
4238   // special case v_mac_{f16, f32}:
4239   // it has src2 register operand that is tied to dst operand
4240   // we don't allow modifiers for this operand in assembler so src2_modifiers
4241   // should be 0
4242   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4243       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4244     auto it = Inst.begin();
4245     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4246     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4247     ++it;
4248     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4249   }
4250 }
4251 
4252 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4253   OptionalImmIndexMap OptionalIdx;
4254   cvtVOP3(Inst, Operands, OptionalIdx);
4255 }
4256 
4257 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
4258   OptionalImmIndexMap OptIdx;
4259 
4260   cvtVOP3(Inst, Operands, OptIdx);
4261 
4262   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4263   // instruction, and then figure out where to actually put the modifiers
4264   int Opc = Inst.getOpcode();
4265 
4266   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4267 
4268   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4269   if (OpSelHiIdx != -1) {
4270     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
4271   }
4272 
4273   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4274   if (NegLoIdx != -1) {
4275     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4276     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4277   }
4278 
4279   const int Ops[] = { AMDGPU::OpName::src0,
4280                       AMDGPU::OpName::src1,
4281                       AMDGPU::OpName::src2 };
4282   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4283                          AMDGPU::OpName::src1_modifiers,
4284                          AMDGPU::OpName::src2_modifiers };
4285 
4286   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4287 
4288   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4289   unsigned OpSelHi = 0;
4290   unsigned NegLo = 0;
4291   unsigned NegHi = 0;
4292 
4293   if (OpSelHiIdx != -1) {
4294     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4295   }
4296 
4297   if (NegLoIdx != -1) {
4298     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4299     NegLo = Inst.getOperand(NegLoIdx).getImm();
4300     NegHi = Inst.getOperand(NegHiIdx).getImm();
4301   }
4302 
4303   for (int J = 0; J < 3; ++J) {
4304     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4305     if (OpIdx == -1)
4306       break;
4307 
4308     uint32_t ModVal = 0;
4309 
4310     if ((OpSel & (1 << J)) != 0)
4311       ModVal |= SISrcMods::OP_SEL_0;
4312 
4313     if ((OpSelHi & (1 << J)) != 0)
4314       ModVal |= SISrcMods::OP_SEL_1;
4315 
4316     if ((NegLo & (1 << J)) != 0)
4317       ModVal |= SISrcMods::NEG;
4318 
4319     if ((NegHi & (1 << J)) != 0)
4320       ModVal |= SISrcMods::NEG_HI;
4321 
4322     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4323 
4324     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4325   }
4326 }
4327 
4328 //===----------------------------------------------------------------------===//
4329 // dpp
4330 //===----------------------------------------------------------------------===//
4331 
4332 bool AMDGPUOperand::isDPPCtrl() const {
4333   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4334   if (result) {
4335     int64_t Imm = getImm();
4336     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4337            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4338            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4339            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4340            (Imm == 0x130) ||
4341            (Imm == 0x134) ||
4342            (Imm == 0x138) ||
4343            (Imm == 0x13c) ||
4344            (Imm == 0x140) ||
4345            (Imm == 0x141) ||
4346            (Imm == 0x142) ||
4347            (Imm == 0x143);
4348   }
4349   return false;
4350 }
4351 
4352 bool AMDGPUOperand::isGPRIdxMode() const {
4353   return isImm() && isUInt<4>(getImm());
4354 }
4355 
4356 bool AMDGPUOperand::isS16Imm() const {
4357   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4358 }
4359 
4360 bool AMDGPUOperand::isU16Imm() const {
4361   return isImm() && isUInt<16>(getImm());
4362 }
4363 
4364 OperandMatchResultTy
4365 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4366   SMLoc S = Parser.getTok().getLoc();
4367   StringRef Prefix;
4368   int64_t Int;
4369 
4370   if (getLexer().getKind() == AsmToken::Identifier) {
4371     Prefix = Parser.getTok().getString();
4372   } else {
4373     return MatchOperand_NoMatch;
4374   }
4375 
4376   if (Prefix == "row_mirror") {
4377     Int = 0x140;
4378     Parser.Lex();
4379   } else if (Prefix == "row_half_mirror") {
4380     Int = 0x141;
4381     Parser.Lex();
4382   } else {
4383     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4384     if (Prefix != "quad_perm"
4385         && Prefix != "row_shl"
4386         && Prefix != "row_shr"
4387         && Prefix != "row_ror"
4388         && Prefix != "wave_shl"
4389         && Prefix != "wave_rol"
4390         && Prefix != "wave_shr"
4391         && Prefix != "wave_ror"
4392         && Prefix != "row_bcast") {
4393       return MatchOperand_NoMatch;
4394     }
4395 
4396     Parser.Lex();
4397     if (getLexer().isNot(AsmToken::Colon))
4398       return MatchOperand_ParseFail;
4399 
4400     if (Prefix == "quad_perm") {
4401       // quad_perm:[%d,%d,%d,%d]
4402       Parser.Lex();
4403       if (getLexer().isNot(AsmToken::LBrac))
4404         return MatchOperand_ParseFail;
4405       Parser.Lex();
4406 
4407       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4408         return MatchOperand_ParseFail;
4409 
4410       for (int i = 0; i < 3; ++i) {
4411         if (getLexer().isNot(AsmToken::Comma))
4412           return MatchOperand_ParseFail;
4413         Parser.Lex();
4414 
4415         int64_t Temp;
4416         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4417           return MatchOperand_ParseFail;
4418         const int shift = i*2 + 2;
4419         Int += (Temp << shift);
4420       }
4421 
4422       if (getLexer().isNot(AsmToken::RBrac))
4423         return MatchOperand_ParseFail;
4424       Parser.Lex();
4425     } else {
4426       // sel:%d
4427       Parser.Lex();
4428       if (getParser().parseAbsoluteExpression(Int))
4429         return MatchOperand_ParseFail;
4430 
4431       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4432         Int |= 0x100;
4433       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4434         Int |= 0x110;
4435       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4436         Int |= 0x120;
4437       } else if (Prefix == "wave_shl" && 1 == Int) {
4438         Int = 0x130;
4439       } else if (Prefix == "wave_rol" && 1 == Int) {
4440         Int = 0x134;
4441       } else if (Prefix == "wave_shr" && 1 == Int) {
4442         Int = 0x138;
4443       } else if (Prefix == "wave_ror" && 1 == Int) {
4444         Int = 0x13C;
4445       } else if (Prefix == "row_bcast") {
4446         if (Int == 15) {
4447           Int = 0x142;
4448         } else if (Int == 31) {
4449           Int = 0x143;
4450         } else {
4451           return MatchOperand_ParseFail;
4452         }
4453       } else {
4454         return MatchOperand_ParseFail;
4455       }
4456     }
4457   }
4458 
4459   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4460   return MatchOperand_Success;
4461 }
4462 
4463 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4464   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4465 }
4466 
4467 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4468   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4469 }
4470 
4471 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4472   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4473 }
4474 
4475 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4476   OptionalImmIndexMap OptionalIdx;
4477 
4478   unsigned I = 1;
4479   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4480   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4481     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4482   }
4483 
4484   // All DPP instructions with at least one source operand have a fake "old"
4485   // source at the beginning that's tied to the dst operand. Handle it here.
4486   if (Desc.getNumOperands() >= 2)
4487     Inst.addOperand(Inst.getOperand(0));
4488 
4489   for (unsigned E = Operands.size(); I != E; ++I) {
4490     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4491     // Add the register arguments
4492     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4493       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4494       // Skip it.
4495       continue;
4496     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4497       Op.addRegWithFPInputModsOperands(Inst, 2);
4498     } else if (Op.isDPPCtrl()) {
4499       Op.addImmOperands(Inst, 1);
4500     } else if (Op.isImm()) {
4501       // Handle optional arguments
4502       OptionalIdx[Op.getImmTy()] = I;
4503     } else {
4504       llvm_unreachable("Invalid operand type");
4505     }
4506   }
4507 
4508   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4509   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4510   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4511 }
4512 
4513 //===----------------------------------------------------------------------===//
4514 // sdwa
4515 //===----------------------------------------------------------------------===//
4516 
4517 OperandMatchResultTy
4518 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4519                               AMDGPUOperand::ImmTy Type) {
4520   using namespace llvm::AMDGPU::SDWA;
4521 
4522   SMLoc S = Parser.getTok().getLoc();
4523   StringRef Value;
4524   OperandMatchResultTy res;
4525 
4526   res = parseStringWithPrefix(Prefix, Value);
4527   if (res != MatchOperand_Success) {
4528     return res;
4529   }
4530 
4531   int64_t Int;
4532   Int = StringSwitch<int64_t>(Value)
4533         .Case("BYTE_0", SdwaSel::BYTE_0)
4534         .Case("BYTE_1", SdwaSel::BYTE_1)
4535         .Case("BYTE_2", SdwaSel::BYTE_2)
4536         .Case("BYTE_3", SdwaSel::BYTE_3)
4537         .Case("WORD_0", SdwaSel::WORD_0)
4538         .Case("WORD_1", SdwaSel::WORD_1)
4539         .Case("DWORD", SdwaSel::DWORD)
4540         .Default(0xffffffff);
4541   Parser.Lex(); // eat last token
4542 
4543   if (Int == 0xffffffff) {
4544     return MatchOperand_ParseFail;
4545   }
4546 
4547   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4548   return MatchOperand_Success;
4549 }
4550 
4551 OperandMatchResultTy
4552 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4553   using namespace llvm::AMDGPU::SDWA;
4554 
4555   SMLoc S = Parser.getTok().getLoc();
4556   StringRef Value;
4557   OperandMatchResultTy res;
4558 
4559   res = parseStringWithPrefix("dst_unused", Value);
4560   if (res != MatchOperand_Success) {
4561     return res;
4562   }
4563 
4564   int64_t Int;
4565   Int = StringSwitch<int64_t>(Value)
4566         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4567         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4568         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4569         .Default(0xffffffff);
4570   Parser.Lex(); // eat last token
4571 
4572   if (Int == 0xffffffff) {
4573     return MatchOperand_ParseFail;
4574   }
4575 
4576   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4577   return MatchOperand_Success;
4578 }
4579 
4580 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4581   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4582 }
4583 
4584 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4585   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4586 }
4587 
4588 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4589   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4590 }
4591 
4592 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4593   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4594 }
4595 
4596 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4597                               uint64_t BasicInstType, bool skipVcc) {
4598   using namespace llvm::AMDGPU::SDWA;
4599 
4600   OptionalImmIndexMap OptionalIdx;
4601   bool skippedVcc = false;
4602 
4603   unsigned I = 1;
4604   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4605   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4606     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4607   }
4608 
4609   for (unsigned E = Operands.size(); I != E; ++I) {
4610     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4611     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4612       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4613       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4614       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4615       // Skip VCC only if we didn't skip it on previous iteration.
4616       if (BasicInstType == SIInstrFlags::VOP2 &&
4617           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4618         skippedVcc = true;
4619         continue;
4620       } else if (BasicInstType == SIInstrFlags::VOPC &&
4621                  Inst.getNumOperands() == 0) {
4622         skippedVcc = true;
4623         continue;
4624       }
4625     }
4626     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4627       Op.addRegWithInputModsOperands(Inst, 2);
4628     } else if (Op.isImm()) {
4629       // Handle optional arguments
4630       OptionalIdx[Op.getImmTy()] = I;
4631     } else {
4632       llvm_unreachable("Invalid operand type");
4633     }
4634     skippedVcc = false;
4635   }
4636 
4637   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4638       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4639     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4640     switch (BasicInstType) {
4641     case SIInstrFlags::VOP1:
4642       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4643       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4644         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4645       }
4646       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4647       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4648       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4649       break;
4650 
4651     case SIInstrFlags::VOP2:
4652       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4653       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4654         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4655       }
4656       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4657       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4658       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4659       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4660       break;
4661 
4662     case SIInstrFlags::VOPC:
4663       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4664       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4665       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4666       break;
4667 
4668     default:
4669       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4670     }
4671   }
4672 
4673   // special case v_mac_{f16, f32}:
4674   // it has src2 register operand that is tied to dst operand
4675   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4676       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4677     auto it = Inst.begin();
4678     std::advance(
4679       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4680     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4681   }
4682 }
4683 
4684 /// Force static initialization.
4685 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4686   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4687   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4688 }
4689 
4690 #define GET_REGISTER_MATCHER
4691 #define GET_MATCHER_IMPLEMENTATION
4692 #include "AMDGPUGenAsmMatcher.inc"
4693 
4694 // This fuction should be defined after auto-generated include so that we have
4695 // MatchClassKind enum defined
4696 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4697                                                      unsigned Kind) {
4698   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4699   // But MatchInstructionImpl() expects to meet token and fails to validate
4700   // operand. This method checks if we are given immediate operand but expect to
4701   // get corresponding token.
4702   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4703   switch (Kind) {
4704   case MCK_addr64:
4705     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4706   case MCK_gds:
4707     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4708   case MCK_glc:
4709     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4710   case MCK_idxen:
4711     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4712   case MCK_offen:
4713     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4714   case MCK_SSrcB32:
4715     // When operands have expression values, they will return true for isToken,
4716     // because it is not possible to distinguish between a token and an
4717     // expression at parse time. MatchInstructionImpl() will always try to
4718     // match an operand as a token, when isToken returns true, and when the
4719     // name of the expression is not a valid token, the match will fail,
4720     // so we need to handle it here.
4721     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4722   case MCK_SSrcF32:
4723     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4724   case MCK_SoppBrTarget:
4725     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4726   case MCK_VReg32OrOff:
4727     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4728   case MCK_InterpSlot:
4729     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4730   case MCK_Attr:
4731     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4732   case MCK_AttrChan:
4733     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4734   default:
4735     return Match_InvalidOperand;
4736   }
4737 }
4738