1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyInstOffset,
136     ImmTyOffset0,
137     ImmTyOffset1,
138     ImmTyGLC,
139     ImmTySLC,
140     ImmTyTFE,
141     ImmTyClampSI,
142     ImmTyOModSI,
143     ImmTyDppCtrl,
144     ImmTyDppRowMask,
145     ImmTyDppBankMask,
146     ImmTyDppBoundCtrl,
147     ImmTySdwaDstSel,
148     ImmTySdwaSrc0Sel,
149     ImmTySdwaSrc1Sel,
150     ImmTySdwaDstUnused,
151     ImmTyDMask,
152     ImmTyUNorm,
153     ImmTyDA,
154     ImmTyR128,
155     ImmTyLWE,
156     ImmTyExpTgt,
157     ImmTyExpCompr,
158     ImmTyExpVM,
159     ImmTyDFMT,
160     ImmTyNFMT,
161     ImmTyHwreg,
162     ImmTyOff,
163     ImmTySendMsg,
164     ImmTyInterpSlot,
165     ImmTyInterpAttr,
166     ImmTyAttrChan,
167     ImmTyOpSel,
168     ImmTyOpSelHi,
169     ImmTyNegLo,
170     ImmTyNegHi,
171     ImmTySwizzle,
172     ImmTyHigh
173   };
174 
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     Modifiers Mods;
185   };
186 
187   struct RegOp {
188     unsigned RegNo;
189     bool IsForcedVOP3;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200   bool isToken() const override {
201     if (Kind == Token)
202       return true;
203 
204     if (Kind != Expression || !Expr)
205       return false;
206 
207     // When parsing operands, we can't always tell if something was meant to be
208     // a token, like 'gds', or an expression that references a global variable.
209     // In this case, we assume the string is an expression, and if we need to
210     // interpret is a token, then we treat the symbol name as the token.
211     return isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   bool isInlinableImm(MVT type) const;
219   bool isLiteralImm(MVT type) const;
220 
221   bool isRegKind() const {
222     return Kind == Register;
223   }
224 
225   bool isReg() const override {
226     return isRegKind() && !hasModifiers();
227   }
228 
229   bool isRegOrImmWithInputMods(MVT type) const {
230     return isRegKind() || isInlinableImm(type);
231   }
232 
233   bool isRegOrImmWithInt16InputMods() const {
234     return isRegOrImmWithInputMods(MVT::i16);
235   }
236 
237   bool isRegOrImmWithInt32InputMods() const {
238     return isRegOrImmWithInputMods(MVT::i32);
239   }
240 
241   bool isRegOrImmWithInt64InputMods() const {
242     return isRegOrImmWithInputMods(MVT::i64);
243   }
244 
245   bool isRegOrImmWithFP16InputMods() const {
246     return isRegOrImmWithInputMods(MVT::f16);
247   }
248 
249   bool isRegOrImmWithFP32InputMods() const {
250     return isRegOrImmWithInputMods(MVT::f32);
251   }
252 
253   bool isRegOrImmWithFP64InputMods() const {
254     return isRegOrImmWithInputMods(MVT::f64);
255   }
256 
257   bool isVReg() const {
258     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
259            isRegClass(AMDGPU::VReg_64RegClassID) ||
260            isRegClass(AMDGPU::VReg_96RegClassID) ||
261            isRegClass(AMDGPU::VReg_128RegClassID) ||
262            isRegClass(AMDGPU::VReg_256RegClassID) ||
263            isRegClass(AMDGPU::VReg_512RegClassID);
264   }
265 
266   bool isVReg32OrOff() const {
267     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
268   }
269 
270   bool isSDWARegKind() const;
271 
272   bool isImmTy(ImmTy ImmT) const {
273     return isImm() && Imm.Type == ImmT;
274   }
275 
276   bool isImmModifier() const {
277     return isImm() && Imm.Type != ImmTyNone;
278   }
279 
280   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
281   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
282   bool isDMask() const { return isImmTy(ImmTyDMask); }
283   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
284   bool isDA() const { return isImmTy(ImmTyDA); }
285   bool isR128() const { return isImmTy(ImmTyUNorm); }
286   bool isLWE() const { return isImmTy(ImmTyLWE); }
287   bool isOff() const { return isImmTy(ImmTyOff); }
288   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
289   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
290   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
291   bool isOffen() const { return isImmTy(ImmTyOffen); }
292   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
293   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
294   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
295   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
296   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
297 
298   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
299   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
300   bool isGDS() const { return isImmTy(ImmTyGDS); }
301   bool isGLC() const { return isImmTy(ImmTyGLC); }
302   bool isSLC() const { return isImmTy(ImmTySLC); }
303   bool isTFE() const { return isImmTy(ImmTyTFE); }
304   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
305   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
306   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
307   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
308   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
309   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
310   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
311   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
312   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
313   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
314   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
315   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
316   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
317   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
318   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
319   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
320   bool isHigh() const { return isImmTy(ImmTyHigh); }
321 
322   bool isMod() const {
323     return isClampSI() || isOModSI();
324   }
325 
326   bool isRegOrImm() const {
327     return isReg() || isImm();
328   }
329 
330   bool isRegClass(unsigned RCID) const;
331 
332   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
333     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
334   }
335 
336   bool isSCSrcB16() const {
337     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
338   }
339 
340   bool isSCSrcV2B16() const {
341     return isSCSrcB16();
342   }
343 
344   bool isSCSrcB32() const {
345     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
346   }
347 
348   bool isSCSrcB64() const {
349     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
350   }
351 
352   bool isSCSrcF16() const {
353     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
354   }
355 
356   bool isSCSrcV2F16() const {
357     return isSCSrcF16();
358   }
359 
360   bool isSCSrcF32() const {
361     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
362   }
363 
364   bool isSCSrcF64() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
366   }
367 
368   bool isSSrcB32() const {
369     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
370   }
371 
372   bool isSSrcB16() const {
373     return isSCSrcB16() || isLiteralImm(MVT::i16);
374   }
375 
376   bool isSSrcV2B16() const {
377     llvm_unreachable("cannot happen");
378     return isSSrcB16();
379   }
380 
381   bool isSSrcB64() const {
382     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
383     // See isVSrc64().
384     return isSCSrcB64() || isLiteralImm(MVT::i64);
385   }
386 
387   bool isSSrcF32() const {
388     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
389   }
390 
391   bool isSSrcF64() const {
392     return isSCSrcB64() || isLiteralImm(MVT::f64);
393   }
394 
395   bool isSSrcF16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::f16);
397   }
398 
399   bool isSSrcV2F16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcF16();
402   }
403 
404   bool isVCSrcB32() const {
405     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
406   }
407 
408   bool isVCSrcB64() const {
409     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
410   }
411 
412   bool isVCSrcB16() const {
413     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
414   }
415 
416   bool isVCSrcV2B16() const {
417     return isVCSrcB16();
418   }
419 
420   bool isVCSrcF32() const {
421     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
422   }
423 
424   bool isVCSrcF64() const {
425     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
426   }
427 
428   bool isVCSrcF16() const {
429     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
430   }
431 
432   bool isVCSrcV2F16() const {
433     return isVCSrcF16();
434   }
435 
436   bool isVSrcB32() const {
437     return isVCSrcF32() || isLiteralImm(MVT::i32);
438   }
439 
440   bool isVSrcB64() const {
441     return isVCSrcF64() || isLiteralImm(MVT::i64);
442   }
443 
444   bool isVSrcB16() const {
445     return isVCSrcF16() || isLiteralImm(MVT::i16);
446   }
447 
448   bool isVSrcV2B16() const {
449     llvm_unreachable("cannot happen");
450     return isVSrcB16();
451   }
452 
453   bool isVSrcF32() const {
454     return isVCSrcF32() || isLiteralImm(MVT::f32);
455   }
456 
457   bool isVSrcF64() const {
458     return isVCSrcF64() || isLiteralImm(MVT::f64);
459   }
460 
461   bool isVSrcF16() const {
462     return isVCSrcF16() || isLiteralImm(MVT::f16);
463   }
464 
465   bool isVSrcV2F16() const {
466     llvm_unreachable("cannot happen");
467     return isVSrcF16();
468   }
469 
470   bool isKImmFP32() const {
471     return isLiteralImm(MVT::f32);
472   }
473 
474   bool isKImmFP16() const {
475     return isLiteralImm(MVT::f16);
476   }
477 
478   bool isMem() const override {
479     return false;
480   }
481 
482   bool isExpr() const {
483     return Kind == Expression;
484   }
485 
486   bool isSoppBrTarget() const {
487     return isExpr() || isImm();
488   }
489 
490   bool isSWaitCnt() const;
491   bool isHwreg() const;
492   bool isSendMsg() const;
493   bool isSwizzle() const;
494   bool isSMRDOffset8() const;
495   bool isSMRDOffset20() const;
496   bool isSMRDLiteralOffset() const;
497   bool isDPPCtrl() const;
498   bool isGPRIdxMode() const;
499   bool isS16Imm() const;
500   bool isU16Imm() const;
501 
502   StringRef getExpressionAsToken() const {
503     assert(isExpr());
504     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
505     return S->getSymbol().getName();
506   }
507 
508   StringRef getToken() const {
509     assert(isToken());
510 
511     if (Kind == Expression)
512       return getExpressionAsToken();
513 
514     return StringRef(Tok.Data, Tok.Length);
515   }
516 
517   int64_t getImm() const {
518     assert(isImm());
519     return Imm.Val;
520   }
521 
522   ImmTy getImmTy() const {
523     assert(isImm());
524     return Imm.Type;
525   }
526 
527   unsigned getReg() const override {
528     return Reg.RegNo;
529   }
530 
531   SMLoc getStartLoc() const override {
532     return StartLoc;
533   }
534 
535   SMLoc getEndLoc() const override {
536     return EndLoc;
537   }
538 
539   SMRange getLocRange() const {
540     return SMRange(StartLoc, EndLoc);
541   }
542 
543   Modifiers getModifiers() const {
544     assert(isRegKind() || isImmTy(ImmTyNone));
545     return isRegKind() ? Reg.Mods : Imm.Mods;
546   }
547 
548   void setModifiers(Modifiers Mods) {
549     assert(isRegKind() || isImmTy(ImmTyNone));
550     if (isRegKind())
551       Reg.Mods = Mods;
552     else
553       Imm.Mods = Mods;
554   }
555 
556   bool hasModifiers() const {
557     return getModifiers().hasModifiers();
558   }
559 
560   bool hasFPModifiers() const {
561     return getModifiers().hasFPModifiers();
562   }
563 
564   bool hasIntModifiers() const {
565     return getModifiers().hasIntModifiers();
566   }
567 
568   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
569 
570   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
571 
572   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
573 
574   template <unsigned Bitwidth>
575   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
576 
577   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
578     addKImmFPOperands<16>(Inst, N);
579   }
580 
581   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
582     addKImmFPOperands<32>(Inst, N);
583   }
584 
585   void addRegOperands(MCInst &Inst, unsigned N) const;
586 
587   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
588     if (isRegKind())
589       addRegOperands(Inst, N);
590     else if (isExpr())
591       Inst.addOperand(MCOperand::createExpr(Expr));
592     else
593       addImmOperands(Inst, N);
594   }
595 
596   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
597     Modifiers Mods = getModifiers();
598     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
599     if (isRegKind()) {
600       addRegOperands(Inst, N);
601     } else {
602       addImmOperands(Inst, N, false);
603     }
604   }
605 
606   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
607     assert(!hasIntModifiers());
608     addRegOrImmWithInputModsOperands(Inst, N);
609   }
610 
611   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
612     assert(!hasFPModifiers());
613     addRegOrImmWithInputModsOperands(Inst, N);
614   }
615 
616   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
617     Modifiers Mods = getModifiers();
618     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
619     assert(isRegKind());
620     addRegOperands(Inst, N);
621   }
622 
623   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
624     assert(!hasIntModifiers());
625     addRegWithInputModsOperands(Inst, N);
626   }
627 
628   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
629     assert(!hasFPModifiers());
630     addRegWithInputModsOperands(Inst, N);
631   }
632 
633   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
634     if (isImm())
635       addImmOperands(Inst, N);
636     else {
637       assert(isExpr());
638       Inst.addOperand(MCOperand::createExpr(Expr));
639     }
640   }
641 
642   static void printImmTy(raw_ostream& OS, ImmTy Type) {
643     switch (Type) {
644     case ImmTyNone: OS << "None"; break;
645     case ImmTyGDS: OS << "GDS"; break;
646     case ImmTyOffen: OS << "Offen"; break;
647     case ImmTyIdxen: OS << "Idxen"; break;
648     case ImmTyAddr64: OS << "Addr64"; break;
649     case ImmTyOffset: OS << "Offset"; break;
650     case ImmTyInstOffset: OS << "InstOffset"; break;
651     case ImmTyOffset0: OS << "Offset0"; break;
652     case ImmTyOffset1: OS << "Offset1"; break;
653     case ImmTyGLC: OS << "GLC"; break;
654     case ImmTySLC: OS << "SLC"; break;
655     case ImmTyTFE: OS << "TFE"; break;
656     case ImmTyDFMT: OS << "DFMT"; break;
657     case ImmTyNFMT: OS << "NFMT"; break;
658     case ImmTyClampSI: OS << "ClampSI"; break;
659     case ImmTyOModSI: OS << "OModSI"; break;
660     case ImmTyDppCtrl: OS << "DppCtrl"; break;
661     case ImmTyDppRowMask: OS << "DppRowMask"; break;
662     case ImmTyDppBankMask: OS << "DppBankMask"; break;
663     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
664     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
665     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
666     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
667     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
668     case ImmTyDMask: OS << "DMask"; break;
669     case ImmTyUNorm: OS << "UNorm"; break;
670     case ImmTyDA: OS << "DA"; break;
671     case ImmTyR128: OS << "R128"; break;
672     case ImmTyLWE: OS << "LWE"; break;
673     case ImmTyOff: OS << "Off"; break;
674     case ImmTyExpTgt: OS << "ExpTgt"; break;
675     case ImmTyExpCompr: OS << "ExpCompr"; break;
676     case ImmTyExpVM: OS << "ExpVM"; break;
677     case ImmTyHwreg: OS << "Hwreg"; break;
678     case ImmTySendMsg: OS << "SendMsg"; break;
679     case ImmTyInterpSlot: OS << "InterpSlot"; break;
680     case ImmTyInterpAttr: OS << "InterpAttr"; break;
681     case ImmTyAttrChan: OS << "AttrChan"; break;
682     case ImmTyOpSel: OS << "OpSel"; break;
683     case ImmTyOpSelHi: OS << "OpSelHi"; break;
684     case ImmTyNegLo: OS << "NegLo"; break;
685     case ImmTyNegHi: OS << "NegHi"; break;
686     case ImmTySwizzle: OS << "Swizzle"; break;
687     case ImmTyHigh: OS << "High"; break;
688     }
689   }
690 
691   void print(raw_ostream &OS) const override {
692     switch (Kind) {
693     case Register:
694       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
695       break;
696     case Immediate:
697       OS << '<' << getImm();
698       if (getImmTy() != ImmTyNone) {
699         OS << " type: "; printImmTy(OS, getImmTy());
700       }
701       OS << " mods: " << Imm.Mods << '>';
702       break;
703     case Token:
704       OS << '\'' << getToken() << '\'';
705       break;
706     case Expression:
707       OS << "<expr " << *Expr << '>';
708       break;
709     }
710   }
711 
712   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
713                                       int64_t Val, SMLoc Loc,
714                                       ImmTy Type = ImmTyNone,
715                                       bool IsFPImm = false) {
716     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
717     Op->Imm.Val = Val;
718     Op->Imm.IsFPImm = IsFPImm;
719     Op->Imm.Type = Type;
720     Op->Imm.Mods = Modifiers();
721     Op->StartLoc = Loc;
722     Op->EndLoc = Loc;
723     return Op;
724   }
725 
726   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
727                                         StringRef Str, SMLoc Loc,
728                                         bool HasExplicitEncodingSize = true) {
729     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
730     Res->Tok.Data = Str.data();
731     Res->Tok.Length = Str.size();
732     Res->StartLoc = Loc;
733     Res->EndLoc = Loc;
734     return Res;
735   }
736 
737   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
738                                       unsigned RegNo, SMLoc S,
739                                       SMLoc E,
740                                       bool ForceVOP3) {
741     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
742     Op->Reg.RegNo = RegNo;
743     Op->Reg.Mods = Modifiers();
744     Op->Reg.IsForcedVOP3 = ForceVOP3;
745     Op->StartLoc = S;
746     Op->EndLoc = E;
747     return Op;
748   }
749 
750   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
751                                        const class MCExpr *Expr, SMLoc S) {
752     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
753     Op->Expr = Expr;
754     Op->StartLoc = S;
755     Op->EndLoc = S;
756     return Op;
757   }
758 };
759 
760 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
761   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
762   return OS;
763 }
764 
765 //===----------------------------------------------------------------------===//
766 // AsmParser
767 //===----------------------------------------------------------------------===//
768 
769 // Holds info related to the current kernel, e.g. count of SGPRs used.
770 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
771 // .amdgpu_hsa_kernel or at EOF.
772 class KernelScopeInfo {
773   int SgprIndexUnusedMin = -1;
774   int VgprIndexUnusedMin = -1;
775   MCContext *Ctx = nullptr;
776 
777   void usesSgprAt(int i) {
778     if (i >= SgprIndexUnusedMin) {
779       SgprIndexUnusedMin = ++i;
780       if (Ctx) {
781         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
782         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
783       }
784     }
785   }
786 
787   void usesVgprAt(int i) {
788     if (i >= VgprIndexUnusedMin) {
789       VgprIndexUnusedMin = ++i;
790       if (Ctx) {
791         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
792         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
793       }
794     }
795   }
796 
797 public:
798   KernelScopeInfo() = default;
799 
800   void initialize(MCContext &Context) {
801     Ctx = &Context;
802     usesSgprAt(SgprIndexUnusedMin = -1);
803     usesVgprAt(VgprIndexUnusedMin = -1);
804   }
805 
806   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
807     switch (RegKind) {
808       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
809       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
810       default: break;
811     }
812   }
813 };
814 
815 class AMDGPUAsmParser : public MCTargetAsmParser {
816   MCAsmParser &Parser;
817 
818   // Number of extra operands parsed after the first optional operand.
819   // This may be necessary to skip hardcoded mandatory operands.
820   static const unsigned MAX_OPR_LOOKAHEAD = 1;
821 
822   unsigned ForcedEncodingSize = 0;
823   bool ForcedDPP = false;
824   bool ForcedSDWA = false;
825   KernelScopeInfo KernelScope;
826 
827   /// @name Auto-generated Match Functions
828   /// {
829 
830 #define GET_ASSEMBLER_HEADER
831 #include "AMDGPUGenAsmMatcher.inc"
832 
833   /// }
834 
835 private:
836   bool ParseAsAbsoluteExpression(uint32_t &Ret);
837   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
838   bool ParseDirectiveHSACodeObjectVersion();
839   bool ParseDirectiveHSACodeObjectISA();
840   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
841   bool ParseDirectiveAMDKernelCodeT();
842   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
843   bool ParseDirectiveAMDGPUHsaKernel();
844 
845   bool ParseDirectiveISAVersion();
846   bool ParseDirectiveHSAMetadata();
847   bool ParseDirectivePALMetadata();
848 
849   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
850                              RegisterKind RegKind, unsigned Reg1,
851                              unsigned RegNum);
852   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
853                            unsigned& RegNum, unsigned& RegWidth,
854                            unsigned *DwordRegIndex);
855   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
856                     bool IsAtomic, bool IsAtomicReturn);
857   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
858                  bool IsGdsHardcoded);
859 
860 public:
861   enum AMDGPUMatchResultTy {
862     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
863   };
864 
865   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
866 
867   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
868                const MCInstrInfo &MII,
869                const MCTargetOptions &Options)
870       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
871     MCAsmParserExtension::Initialize(Parser);
872 
873     if (getFeatureBits().none()) {
874       // Set default features.
875       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
876     }
877 
878     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
879 
880     {
881       // TODO: make those pre-defined variables read-only.
882       // Currently there is none suitable machinery in the core llvm-mc for this.
883       // MCSymbol::isRedefinable is intended for another purpose, and
884       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
885       AMDGPU::IsaInfo::IsaVersion ISA =
886           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
887       MCContext &Ctx = getContext();
888       MCSymbol *Sym =
889           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
890       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
891       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
892       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
893       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
894       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
895     }
896     KernelScope.initialize(getContext());
897   }
898 
899   bool hasXNACK() const {
900     return AMDGPU::hasXNACK(getSTI());
901   }
902 
903   bool isSI() const {
904     return AMDGPU::isSI(getSTI());
905   }
906 
907   bool isCI() const {
908     return AMDGPU::isCI(getSTI());
909   }
910 
911   bool isVI() const {
912     return AMDGPU::isVI(getSTI());
913   }
914 
915   bool isGFX9() const {
916     return AMDGPU::isGFX9(getSTI());
917   }
918 
919   bool hasInv2PiInlineImm() const {
920     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
921   }
922 
923   bool hasFlatOffsets() const {
924     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
925   }
926 
927   bool hasSGPR102_SGPR103() const {
928     return !isVI();
929   }
930 
931   bool hasIntClamp() const {
932     return getFeatureBits()[AMDGPU::FeatureIntClamp];
933   }
934 
935   AMDGPUTargetStreamer &getTargetStreamer() {
936     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
937     return static_cast<AMDGPUTargetStreamer &>(TS);
938   }
939 
940   const MCRegisterInfo *getMRI() const {
941     // We need this const_cast because for some reason getContext() is not const
942     // in MCAsmParser.
943     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
944   }
945 
946   const MCInstrInfo *getMII() const {
947     return &MII;
948   }
949 
950   const FeatureBitset &getFeatureBits() const {
951     return getSTI().getFeatureBits();
952   }
953 
954   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
955   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
956   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
957 
958   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
959   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
960   bool isForcedDPP() const { return ForcedDPP; }
961   bool isForcedSDWA() const { return ForcedSDWA; }
962   ArrayRef<unsigned> getMatchedVariants() const;
963 
964   std::unique_ptr<AMDGPUOperand> parseRegister();
965   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
966   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
967   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
968                                       unsigned Kind) override;
969   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
970                                OperandVector &Operands, MCStreamer &Out,
971                                uint64_t &ErrorInfo,
972                                bool MatchingInlineAsm) override;
973   bool ParseDirective(AsmToken DirectiveID) override;
974   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
975   StringRef parseMnemonicSuffix(StringRef Name);
976   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
977                         SMLoc NameLoc, OperandVector &Operands) override;
978   //bool ProcessInstruction(MCInst &Inst);
979 
980   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
981 
982   OperandMatchResultTy
983   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
984                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
985                      bool (*ConvertResult)(int64_t &) = nullptr);
986 
987   OperandMatchResultTy parseOperandArrayWithPrefix(
988     const char *Prefix,
989     OperandVector &Operands,
990     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
991     bool (*ConvertResult)(int64_t&) = nullptr);
992 
993   OperandMatchResultTy
994   parseNamedBit(const char *Name, OperandVector &Operands,
995                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
996   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
997                                              StringRef &Value);
998 
999   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1000   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1001   OperandMatchResultTy parseReg(OperandVector &Operands);
1002   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1003   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1004   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1005   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1006   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1007   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1008 
1009   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1010   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1011   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1012   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1013 
1014   bool parseCnt(int64_t &IntVal);
1015   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1016   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1017 
1018 private:
1019   struct OperandInfoTy {
1020     int64_t Id;
1021     bool IsSymbolic = false;
1022 
1023     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1024   };
1025 
1026   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1027   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1028 
1029   void errorExpTgt();
1030   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1031 
1032   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1033   bool validateConstantBusLimitations(const MCInst &Inst);
1034   bool validateEarlyClobberLimitations(const MCInst &Inst);
1035   bool validateIntClampSupported(const MCInst &Inst);
1036   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1037   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1038   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1039 
1040   bool trySkipId(const StringRef Id);
1041   bool trySkipToken(const AsmToken::TokenKind Kind);
1042   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1043   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1044   bool parseExpr(int64_t &Imm);
1045 
1046 public:
1047   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1048   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1049 
1050   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1051   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1052   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1053   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1054   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1055 
1056   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1057                             const unsigned MinVal,
1058                             const unsigned MaxVal,
1059                             const StringRef ErrMsg);
1060   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1061   bool parseSwizzleOffset(int64_t &Imm);
1062   bool parseSwizzleMacro(int64_t &Imm);
1063   bool parseSwizzleQuadPerm(int64_t &Imm);
1064   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1065   bool parseSwizzleBroadcast(int64_t &Imm);
1066   bool parseSwizzleSwap(int64_t &Imm);
1067   bool parseSwizzleReverse(int64_t &Imm);
1068 
1069   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1070   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1071   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1072   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1073 
1074   AMDGPUOperand::Ptr defaultGLC() const;
1075   AMDGPUOperand::Ptr defaultSLC() const;
1076   AMDGPUOperand::Ptr defaultTFE() const;
1077 
1078   AMDGPUOperand::Ptr defaultDMask() const;
1079   AMDGPUOperand::Ptr defaultUNorm() const;
1080   AMDGPUOperand::Ptr defaultDA() const;
1081   AMDGPUOperand::Ptr defaultR128() const;
1082   AMDGPUOperand::Ptr defaultLWE() const;
1083   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1084   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1085   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1086   AMDGPUOperand::Ptr defaultOffsetU12() const;
1087   AMDGPUOperand::Ptr defaultOffsetS13() const;
1088 
1089   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1090 
1091   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1092                OptionalImmIndexMap &OptionalIdx);
1093   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1094   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1095   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1096 
1097   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1098 
1099   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1100                bool IsAtomic = false);
1101   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1102 
1103   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1104   AMDGPUOperand::Ptr defaultRowMask() const;
1105   AMDGPUOperand::Ptr defaultBankMask() const;
1106   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1107   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1108 
1109   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1110                                     AMDGPUOperand::ImmTy Type);
1111   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1112   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1113   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1114   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1115   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1116   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1117                 uint64_t BasicInstType, bool skipVcc = false);
1118 };
1119 
1120 struct OptionalOperand {
1121   const char *Name;
1122   AMDGPUOperand::ImmTy Type;
1123   bool IsBit;
1124   bool (*ConvertResult)(int64_t&);
1125 };
1126 
1127 } // end anonymous namespace
1128 
1129 // May be called with integer type with equivalent bitwidth.
1130 static const fltSemantics *getFltSemantics(unsigned Size) {
1131   switch (Size) {
1132   case 4:
1133     return &APFloat::IEEEsingle();
1134   case 8:
1135     return &APFloat::IEEEdouble();
1136   case 2:
1137     return &APFloat::IEEEhalf();
1138   default:
1139     llvm_unreachable("unsupported fp type");
1140   }
1141 }
1142 
1143 static const fltSemantics *getFltSemantics(MVT VT) {
1144   return getFltSemantics(VT.getSizeInBits() / 8);
1145 }
1146 
1147 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1148   switch (OperandType) {
1149   case AMDGPU::OPERAND_REG_IMM_INT32:
1150   case AMDGPU::OPERAND_REG_IMM_FP32:
1151   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1152   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1153     return &APFloat::IEEEsingle();
1154   case AMDGPU::OPERAND_REG_IMM_INT64:
1155   case AMDGPU::OPERAND_REG_IMM_FP64:
1156   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1157   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1158     return &APFloat::IEEEdouble();
1159   case AMDGPU::OPERAND_REG_IMM_INT16:
1160   case AMDGPU::OPERAND_REG_IMM_FP16:
1161   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1162   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1163   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1164   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1165     return &APFloat::IEEEhalf();
1166   default:
1167     llvm_unreachable("unsupported fp type");
1168   }
1169 }
1170 
1171 //===----------------------------------------------------------------------===//
1172 // Operand
1173 //===----------------------------------------------------------------------===//
1174 
1175 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1176   bool Lost;
1177 
1178   // Convert literal to single precision
1179   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1180                                                APFloat::rmNearestTiesToEven,
1181                                                &Lost);
1182   // We allow precision lost but not overflow or underflow
1183   if (Status != APFloat::opOK &&
1184       Lost &&
1185       ((Status & APFloat::opOverflow)  != 0 ||
1186        (Status & APFloat::opUnderflow) != 0)) {
1187     return false;
1188   }
1189 
1190   return true;
1191 }
1192 
1193 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1194   if (!isImmTy(ImmTyNone)) {
1195     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1196     return false;
1197   }
1198   // TODO: We should avoid using host float here. It would be better to
1199   // check the float bit values which is what a few other places do.
1200   // We've had bot failures before due to weird NaN support on mips hosts.
1201 
1202   APInt Literal(64, Imm.Val);
1203 
1204   if (Imm.IsFPImm) { // We got fp literal token
1205     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1206       return AMDGPU::isInlinableLiteral64(Imm.Val,
1207                                           AsmParser->hasInv2PiInlineImm());
1208     }
1209 
1210     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1211     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1212       return false;
1213 
1214     if (type.getScalarSizeInBits() == 16) {
1215       return AMDGPU::isInlinableLiteral16(
1216         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1217         AsmParser->hasInv2PiInlineImm());
1218     }
1219 
1220     // Check if single precision literal is inlinable
1221     return AMDGPU::isInlinableLiteral32(
1222       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1223       AsmParser->hasInv2PiInlineImm());
1224   }
1225 
1226   // We got int literal token.
1227   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1228     return AMDGPU::isInlinableLiteral64(Imm.Val,
1229                                         AsmParser->hasInv2PiInlineImm());
1230   }
1231 
1232   if (type.getScalarSizeInBits() == 16) {
1233     return AMDGPU::isInlinableLiteral16(
1234       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1235       AsmParser->hasInv2PiInlineImm());
1236   }
1237 
1238   return AMDGPU::isInlinableLiteral32(
1239     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1240     AsmParser->hasInv2PiInlineImm());
1241 }
1242 
1243 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1244   // Check that this immediate can be added as literal
1245   if (!isImmTy(ImmTyNone)) {
1246     return false;
1247   }
1248 
1249   if (!Imm.IsFPImm) {
1250     // We got int literal token.
1251 
1252     if (type == MVT::f64 && hasFPModifiers()) {
1253       // Cannot apply fp modifiers to int literals preserving the same semantics
1254       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1255       // disable these cases.
1256       return false;
1257     }
1258 
1259     unsigned Size = type.getSizeInBits();
1260     if (Size == 64)
1261       Size = 32;
1262 
1263     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1264     // types.
1265     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1266   }
1267 
1268   // We got fp literal token
1269   if (type == MVT::f64) { // Expected 64-bit fp operand
1270     // We would set low 64-bits of literal to zeroes but we accept this literals
1271     return true;
1272   }
1273 
1274   if (type == MVT::i64) { // Expected 64-bit int operand
1275     // We don't allow fp literals in 64-bit integer instructions. It is
1276     // unclear how we should encode them.
1277     return false;
1278   }
1279 
1280   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1281   return canLosslesslyConvertToFPType(FPLiteral, type);
1282 }
1283 
1284 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1285   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1286 }
1287 
1288 bool AMDGPUOperand::isSDWARegKind() const {
1289   if (AsmParser->isVI())
1290     return isVReg();
1291   else if (AsmParser->isGFX9())
1292     return isRegKind();
1293   else
1294     return false;
1295 }
1296 
1297 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1298 {
1299   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1300   assert(Size == 2 || Size == 4 || Size == 8);
1301 
1302   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1303 
1304   if (Imm.Mods.Abs) {
1305     Val &= ~FpSignMask;
1306   }
1307   if (Imm.Mods.Neg) {
1308     Val ^= FpSignMask;
1309   }
1310 
1311   return Val;
1312 }
1313 
1314 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1315   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1316                              Inst.getNumOperands())) {
1317     addLiteralImmOperand(Inst, Imm.Val,
1318                          ApplyModifiers &
1319                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1320   } else {
1321     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1322     Inst.addOperand(MCOperand::createImm(Imm.Val));
1323   }
1324 }
1325 
1326 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1327   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1328   auto OpNum = Inst.getNumOperands();
1329   // Check that this operand accepts literals
1330   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1331 
1332   if (ApplyModifiers) {
1333     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1334     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1335     Val = applyInputFPModifiers(Val, Size);
1336   }
1337 
1338   APInt Literal(64, Val);
1339   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1340 
1341   if (Imm.IsFPImm) { // We got fp literal token
1342     switch (OpTy) {
1343     case AMDGPU::OPERAND_REG_IMM_INT64:
1344     case AMDGPU::OPERAND_REG_IMM_FP64:
1345     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1346     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1347       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1348                                        AsmParser->hasInv2PiInlineImm())) {
1349         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1350         return;
1351       }
1352 
1353       // Non-inlineable
1354       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1355         // For fp operands we check if low 32 bits are zeros
1356         if (Literal.getLoBits(32) != 0) {
1357           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1358           "Can't encode literal as exact 64-bit floating-point operand. "
1359           "Low 32-bits will be set to zero");
1360         }
1361 
1362         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1363         return;
1364       }
1365 
1366       // We don't allow fp literals in 64-bit integer instructions. It is
1367       // unclear how we should encode them. This case should be checked earlier
1368       // in predicate methods (isLiteralImm())
1369       llvm_unreachable("fp literal in 64-bit integer instruction.");
1370 
1371     case AMDGPU::OPERAND_REG_IMM_INT32:
1372     case AMDGPU::OPERAND_REG_IMM_FP32:
1373     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1374     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1375     case AMDGPU::OPERAND_REG_IMM_INT16:
1376     case AMDGPU::OPERAND_REG_IMM_FP16:
1377     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1378     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1379     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1380     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1381       bool lost;
1382       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1383       // Convert literal to single precision
1384       FPLiteral.convert(*getOpFltSemantics(OpTy),
1385                         APFloat::rmNearestTiesToEven, &lost);
1386       // We allow precision lost but not overflow or underflow. This should be
1387       // checked earlier in isLiteralImm()
1388 
1389       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1390       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1391           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1392         ImmVal |= (ImmVal << 16);
1393       }
1394 
1395       Inst.addOperand(MCOperand::createImm(ImmVal));
1396       return;
1397     }
1398     default:
1399       llvm_unreachable("invalid operand size");
1400     }
1401 
1402     return;
1403   }
1404 
1405    // We got int literal token.
1406   // Only sign extend inline immediates.
1407   // FIXME: No errors on truncation
1408   switch (OpTy) {
1409   case AMDGPU::OPERAND_REG_IMM_INT32:
1410   case AMDGPU::OPERAND_REG_IMM_FP32:
1411   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1412   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1413     if (isInt<32>(Val) &&
1414         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1415                                      AsmParser->hasInv2PiInlineImm())) {
1416       Inst.addOperand(MCOperand::createImm(Val));
1417       return;
1418     }
1419 
1420     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1421     return;
1422 
1423   case AMDGPU::OPERAND_REG_IMM_INT64:
1424   case AMDGPU::OPERAND_REG_IMM_FP64:
1425   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1426   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1427     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1428       Inst.addOperand(MCOperand::createImm(Val));
1429       return;
1430     }
1431 
1432     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1433     return;
1434 
1435   case AMDGPU::OPERAND_REG_IMM_INT16:
1436   case AMDGPU::OPERAND_REG_IMM_FP16:
1437   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1438   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1439     if (isInt<16>(Val) &&
1440         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1441                                      AsmParser->hasInv2PiInlineImm())) {
1442       Inst.addOperand(MCOperand::createImm(Val));
1443       return;
1444     }
1445 
1446     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1447     return;
1448 
1449   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1450   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1451     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1452     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1453                                         AsmParser->hasInv2PiInlineImm()));
1454 
1455     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1456                       static_cast<uint32_t>(LiteralVal);
1457     Inst.addOperand(MCOperand::createImm(ImmVal));
1458     return;
1459   }
1460   default:
1461     llvm_unreachable("invalid operand size");
1462   }
1463 }
1464 
1465 template <unsigned Bitwidth>
1466 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1467   APInt Literal(64, Imm.Val);
1468 
1469   if (!Imm.IsFPImm) {
1470     // We got int literal token.
1471     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1472     return;
1473   }
1474 
1475   bool Lost;
1476   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1477   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1478                     APFloat::rmNearestTiesToEven, &Lost);
1479   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1480 }
1481 
1482 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1483   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1484 }
1485 
1486 //===----------------------------------------------------------------------===//
1487 // AsmParser
1488 //===----------------------------------------------------------------------===//
1489 
1490 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1491   if (Is == IS_VGPR) {
1492     switch (RegWidth) {
1493       default: return -1;
1494       case 1: return AMDGPU::VGPR_32RegClassID;
1495       case 2: return AMDGPU::VReg_64RegClassID;
1496       case 3: return AMDGPU::VReg_96RegClassID;
1497       case 4: return AMDGPU::VReg_128RegClassID;
1498       case 8: return AMDGPU::VReg_256RegClassID;
1499       case 16: return AMDGPU::VReg_512RegClassID;
1500     }
1501   } else if (Is == IS_TTMP) {
1502     switch (RegWidth) {
1503       default: return -1;
1504       case 1: return AMDGPU::TTMP_32RegClassID;
1505       case 2: return AMDGPU::TTMP_64RegClassID;
1506       case 4: return AMDGPU::TTMP_128RegClassID;
1507       case 8: return AMDGPU::TTMP_256RegClassID;
1508       case 16: return AMDGPU::TTMP_512RegClassID;
1509     }
1510   } else if (Is == IS_SGPR) {
1511     switch (RegWidth) {
1512       default: return -1;
1513       case 1: return AMDGPU::SGPR_32RegClassID;
1514       case 2: return AMDGPU::SGPR_64RegClassID;
1515       case 4: return AMDGPU::SGPR_128RegClassID;
1516       case 8: return AMDGPU::SGPR_256RegClassID;
1517       case 16: return AMDGPU::SGPR_512RegClassID;
1518     }
1519   }
1520   return -1;
1521 }
1522 
1523 static unsigned getSpecialRegForName(StringRef RegName) {
1524   return StringSwitch<unsigned>(RegName)
1525     .Case("exec", AMDGPU::EXEC)
1526     .Case("vcc", AMDGPU::VCC)
1527     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1528     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1529     .Case("m0", AMDGPU::M0)
1530     .Case("scc", AMDGPU::SCC)
1531     .Case("tba", AMDGPU::TBA)
1532     .Case("tma", AMDGPU::TMA)
1533     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1534     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1535     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1536     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1537     .Case("vcc_lo", AMDGPU::VCC_LO)
1538     .Case("vcc_hi", AMDGPU::VCC_HI)
1539     .Case("exec_lo", AMDGPU::EXEC_LO)
1540     .Case("exec_hi", AMDGPU::EXEC_HI)
1541     .Case("tma_lo", AMDGPU::TMA_LO)
1542     .Case("tma_hi", AMDGPU::TMA_HI)
1543     .Case("tba_lo", AMDGPU::TBA_LO)
1544     .Case("tba_hi", AMDGPU::TBA_HI)
1545     .Default(0);
1546 }
1547 
1548 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1549                                     SMLoc &EndLoc) {
1550   auto R = parseRegister();
1551   if (!R) return true;
1552   assert(R->isReg());
1553   RegNo = R->getReg();
1554   StartLoc = R->getStartLoc();
1555   EndLoc = R->getEndLoc();
1556   return false;
1557 }
1558 
1559 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1560                                             RegisterKind RegKind, unsigned Reg1,
1561                                             unsigned RegNum) {
1562   switch (RegKind) {
1563   case IS_SPECIAL:
1564     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1565       Reg = AMDGPU::EXEC;
1566       RegWidth = 2;
1567       return true;
1568     }
1569     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1570       Reg = AMDGPU::FLAT_SCR;
1571       RegWidth = 2;
1572       return true;
1573     }
1574     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1575       Reg = AMDGPU::XNACK_MASK;
1576       RegWidth = 2;
1577       return true;
1578     }
1579     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1580       Reg = AMDGPU::VCC;
1581       RegWidth = 2;
1582       return true;
1583     }
1584     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1585       Reg = AMDGPU::TBA;
1586       RegWidth = 2;
1587       return true;
1588     }
1589     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1590       Reg = AMDGPU::TMA;
1591       RegWidth = 2;
1592       return true;
1593     }
1594     return false;
1595   case IS_VGPR:
1596   case IS_SGPR:
1597   case IS_TTMP:
1598     if (Reg1 != Reg + RegWidth) {
1599       return false;
1600     }
1601     RegWidth++;
1602     return true;
1603   default:
1604     llvm_unreachable("unexpected register kind");
1605   }
1606 }
1607 
1608 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1609                                           unsigned &RegNum, unsigned &RegWidth,
1610                                           unsigned *DwordRegIndex) {
1611   if (DwordRegIndex) { *DwordRegIndex = 0; }
1612   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1613   if (getLexer().is(AsmToken::Identifier)) {
1614     StringRef RegName = Parser.getTok().getString();
1615     if ((Reg = getSpecialRegForName(RegName))) {
1616       Parser.Lex();
1617       RegKind = IS_SPECIAL;
1618     } else {
1619       unsigned RegNumIndex = 0;
1620       if (RegName[0] == 'v') {
1621         RegNumIndex = 1;
1622         RegKind = IS_VGPR;
1623       } else if (RegName[0] == 's') {
1624         RegNumIndex = 1;
1625         RegKind = IS_SGPR;
1626       } else if (RegName.startswith("ttmp")) {
1627         RegNumIndex = strlen("ttmp");
1628         RegKind = IS_TTMP;
1629       } else {
1630         return false;
1631       }
1632       if (RegName.size() > RegNumIndex) {
1633         // Single 32-bit register: vXX.
1634         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1635           return false;
1636         Parser.Lex();
1637         RegWidth = 1;
1638       } else {
1639         // Range of registers: v[XX:YY]. ":YY" is optional.
1640         Parser.Lex();
1641         int64_t RegLo, RegHi;
1642         if (getLexer().isNot(AsmToken::LBrac))
1643           return false;
1644         Parser.Lex();
1645 
1646         if (getParser().parseAbsoluteExpression(RegLo))
1647           return false;
1648 
1649         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1650         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1651           return false;
1652         Parser.Lex();
1653 
1654         if (isRBrace) {
1655           RegHi = RegLo;
1656         } else {
1657           if (getParser().parseAbsoluteExpression(RegHi))
1658             return false;
1659 
1660           if (getLexer().isNot(AsmToken::RBrac))
1661             return false;
1662           Parser.Lex();
1663         }
1664         RegNum = (unsigned) RegLo;
1665         RegWidth = (RegHi - RegLo) + 1;
1666       }
1667     }
1668   } else if (getLexer().is(AsmToken::LBrac)) {
1669     // List of consecutive registers: [s0,s1,s2,s3]
1670     Parser.Lex();
1671     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1672       return false;
1673     if (RegWidth != 1)
1674       return false;
1675     RegisterKind RegKind1;
1676     unsigned Reg1, RegNum1, RegWidth1;
1677     do {
1678       if (getLexer().is(AsmToken::Comma)) {
1679         Parser.Lex();
1680       } else if (getLexer().is(AsmToken::RBrac)) {
1681         Parser.Lex();
1682         break;
1683       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1684         if (RegWidth1 != 1) {
1685           return false;
1686         }
1687         if (RegKind1 != RegKind) {
1688           return false;
1689         }
1690         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1691           return false;
1692         }
1693       } else {
1694         return false;
1695       }
1696     } while (true);
1697   } else {
1698     return false;
1699   }
1700   switch (RegKind) {
1701   case IS_SPECIAL:
1702     RegNum = 0;
1703     RegWidth = 1;
1704     break;
1705   case IS_VGPR:
1706   case IS_SGPR:
1707   case IS_TTMP:
1708   {
1709     unsigned Size = 1;
1710     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1711       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1712       Size = std::min(RegWidth, 4u);
1713     }
1714     if (RegNum % Size != 0)
1715       return false;
1716     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1717     RegNum = RegNum / Size;
1718     int RCID = getRegClass(RegKind, RegWidth);
1719     if (RCID == -1)
1720       return false;
1721     const MCRegisterClass RC = TRI->getRegClass(RCID);
1722     if (RegNum >= RC.getNumRegs())
1723       return false;
1724     Reg = RC.getRegister(RegNum);
1725     break;
1726   }
1727 
1728   default:
1729     llvm_unreachable("unexpected register kind");
1730   }
1731 
1732   if (!subtargetHasRegister(*TRI, Reg))
1733     return false;
1734   return true;
1735 }
1736 
1737 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1738   const auto &Tok = Parser.getTok();
1739   SMLoc StartLoc = Tok.getLoc();
1740   SMLoc EndLoc = Tok.getEndLoc();
1741   RegisterKind RegKind;
1742   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1743 
1744   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1745     return nullptr;
1746   }
1747   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1748   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1749 }
1750 
1751 bool
1752 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1753   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1754       (getLexer().getKind() == AsmToken::Integer ||
1755        getLexer().getKind() == AsmToken::Real)) {
1756     // This is a workaround for handling operands like these:
1757     //     |1.0|
1758     //     |-1|
1759     // This syntax is not compatible with syntax of standard
1760     // MC expressions (due to the trailing '|').
1761 
1762     SMLoc EndLoc;
1763     const MCExpr *Expr;
1764 
1765     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1766       return true;
1767     }
1768 
1769     return !Expr->evaluateAsAbsolute(Val);
1770   }
1771 
1772   return getParser().parseAbsoluteExpression(Val);
1773 }
1774 
1775 OperandMatchResultTy
1776 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1777   // TODO: add syntactic sugar for 1/(2*PI)
1778   bool Minus = false;
1779   if (getLexer().getKind() == AsmToken::Minus) {
1780     const AsmToken NextToken = getLexer().peekTok();
1781     if (!NextToken.is(AsmToken::Integer) &&
1782         !NextToken.is(AsmToken::Real)) {
1783         return MatchOperand_NoMatch;
1784     }
1785     Minus = true;
1786     Parser.Lex();
1787   }
1788 
1789   SMLoc S = Parser.getTok().getLoc();
1790   switch(getLexer().getKind()) {
1791   case AsmToken::Integer: {
1792     int64_t IntVal;
1793     if (parseAbsoluteExpr(IntVal, AbsMod))
1794       return MatchOperand_ParseFail;
1795     if (Minus)
1796       IntVal *= -1;
1797     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1798     return MatchOperand_Success;
1799   }
1800   case AsmToken::Real: {
1801     int64_t IntVal;
1802     if (parseAbsoluteExpr(IntVal, AbsMod))
1803       return MatchOperand_ParseFail;
1804 
1805     APFloat F(BitsToDouble(IntVal));
1806     if (Minus)
1807       F.changeSign();
1808     Operands.push_back(
1809         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1810                                  AMDGPUOperand::ImmTyNone, true));
1811     return MatchOperand_Success;
1812   }
1813   default:
1814     return MatchOperand_NoMatch;
1815   }
1816 }
1817 
1818 OperandMatchResultTy
1819 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1820   if (auto R = parseRegister()) {
1821     assert(R->isReg());
1822     R->Reg.IsForcedVOP3 = isForcedVOP3();
1823     Operands.push_back(std::move(R));
1824     return MatchOperand_Success;
1825   }
1826   return MatchOperand_NoMatch;
1827 }
1828 
1829 OperandMatchResultTy
1830 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1831   auto res = parseImm(Operands, AbsMod);
1832   if (res != MatchOperand_NoMatch) {
1833     return res;
1834   }
1835 
1836   return parseReg(Operands);
1837 }
1838 
1839 OperandMatchResultTy
1840 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1841                                               bool AllowImm) {
1842   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1843 
1844   if (getLexer().getKind()== AsmToken::Minus) {
1845     const AsmToken NextToken = getLexer().peekTok();
1846 
1847     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1848     if (NextToken.is(AsmToken::Minus)) {
1849       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1850       return MatchOperand_ParseFail;
1851     }
1852 
1853     // '-' followed by an integer literal N should be interpreted as integer
1854     // negation rather than a floating-point NEG modifier applied to N.
1855     // Beside being contr-intuitive, such use of floating-point NEG modifier
1856     // results in different meaning of integer literals used with VOP1/2/C
1857     // and VOP3, for example:
1858     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1859     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1860     // Negative fp literals should be handled likewise for unifomtity
1861     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1862       Parser.Lex();
1863       Negate = true;
1864     }
1865   }
1866 
1867   if (getLexer().getKind() == AsmToken::Identifier &&
1868       Parser.getTok().getString() == "neg") {
1869     if (Negate) {
1870       Error(Parser.getTok().getLoc(), "expected register or immediate");
1871       return MatchOperand_ParseFail;
1872     }
1873     Parser.Lex();
1874     Negate2 = true;
1875     if (getLexer().isNot(AsmToken::LParen)) {
1876       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1877       return MatchOperand_ParseFail;
1878     }
1879     Parser.Lex();
1880   }
1881 
1882   if (getLexer().getKind() == AsmToken::Identifier &&
1883       Parser.getTok().getString() == "abs") {
1884     Parser.Lex();
1885     Abs2 = true;
1886     if (getLexer().isNot(AsmToken::LParen)) {
1887       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1888       return MatchOperand_ParseFail;
1889     }
1890     Parser.Lex();
1891   }
1892 
1893   if (getLexer().getKind() == AsmToken::Pipe) {
1894     if (Abs2) {
1895       Error(Parser.getTok().getLoc(), "expected register or immediate");
1896       return MatchOperand_ParseFail;
1897     }
1898     Parser.Lex();
1899     Abs = true;
1900   }
1901 
1902   OperandMatchResultTy Res;
1903   if (AllowImm) {
1904     Res = parseRegOrImm(Operands, Abs);
1905   } else {
1906     Res = parseReg(Operands);
1907   }
1908   if (Res != MatchOperand_Success) {
1909     return Res;
1910   }
1911 
1912   AMDGPUOperand::Modifiers Mods;
1913   if (Abs) {
1914     if (getLexer().getKind() != AsmToken::Pipe) {
1915       Error(Parser.getTok().getLoc(), "expected vertical bar");
1916       return MatchOperand_ParseFail;
1917     }
1918     Parser.Lex();
1919     Mods.Abs = true;
1920   }
1921   if (Abs2) {
1922     if (getLexer().isNot(AsmToken::RParen)) {
1923       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1924       return MatchOperand_ParseFail;
1925     }
1926     Parser.Lex();
1927     Mods.Abs = true;
1928   }
1929 
1930   if (Negate) {
1931     Mods.Neg = true;
1932   } else if (Negate2) {
1933     if (getLexer().isNot(AsmToken::RParen)) {
1934       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1935       return MatchOperand_ParseFail;
1936     }
1937     Parser.Lex();
1938     Mods.Neg = true;
1939   }
1940 
1941   if (Mods.hasFPModifiers()) {
1942     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1943     Op.setModifiers(Mods);
1944   }
1945   return MatchOperand_Success;
1946 }
1947 
1948 OperandMatchResultTy
1949 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1950                                                bool AllowImm) {
1951   bool Sext = false;
1952 
1953   if (getLexer().getKind() == AsmToken::Identifier &&
1954       Parser.getTok().getString() == "sext") {
1955     Parser.Lex();
1956     Sext = true;
1957     if (getLexer().isNot(AsmToken::LParen)) {
1958       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1959       return MatchOperand_ParseFail;
1960     }
1961     Parser.Lex();
1962   }
1963 
1964   OperandMatchResultTy Res;
1965   if (AllowImm) {
1966     Res = parseRegOrImm(Operands);
1967   } else {
1968     Res = parseReg(Operands);
1969   }
1970   if (Res != MatchOperand_Success) {
1971     return Res;
1972   }
1973 
1974   AMDGPUOperand::Modifiers Mods;
1975   if (Sext) {
1976     if (getLexer().isNot(AsmToken::RParen)) {
1977       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1978       return MatchOperand_ParseFail;
1979     }
1980     Parser.Lex();
1981     Mods.Sext = true;
1982   }
1983 
1984   if (Mods.hasIntModifiers()) {
1985     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1986     Op.setModifiers(Mods);
1987   }
1988 
1989   return MatchOperand_Success;
1990 }
1991 
1992 OperandMatchResultTy
1993 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1994   return parseRegOrImmWithFPInputMods(Operands, false);
1995 }
1996 
1997 OperandMatchResultTy
1998 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1999   return parseRegOrImmWithIntInputMods(Operands, false);
2000 }
2001 
2002 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2003   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2004   if (Reg) {
2005     Operands.push_back(std::move(Reg));
2006     return MatchOperand_Success;
2007   }
2008 
2009   const AsmToken &Tok = Parser.getTok();
2010   if (Tok.getString() == "off") {
2011     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2012                                                 AMDGPUOperand::ImmTyOff, false));
2013     Parser.Lex();
2014     return MatchOperand_Success;
2015   }
2016 
2017   return MatchOperand_NoMatch;
2018 }
2019 
2020 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2021   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2022 
2023   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2024       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2025       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2026       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2027     return Match_InvalidOperand;
2028 
2029   if ((TSFlags & SIInstrFlags::VOP3) &&
2030       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2031       getForcedEncodingSize() != 64)
2032     return Match_PreferE32;
2033 
2034   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2035       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2036     // v_mac_f32/16 allow only dst_sel == DWORD;
2037     auto OpNum =
2038         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2039     const auto &Op = Inst.getOperand(OpNum);
2040     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2041       return Match_InvalidOperand;
2042     }
2043   }
2044 
2045   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2046     // FIXME: Produces error without correct column reported.
2047     auto OpNum =
2048         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2049     const auto &Op = Inst.getOperand(OpNum);
2050     if (Op.getImm() != 0)
2051       return Match_InvalidOperand;
2052   }
2053 
2054   return Match_Success;
2055 }
2056 
2057 // What asm variants we should check
2058 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2059   if (getForcedEncodingSize() == 32) {
2060     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2061     return makeArrayRef(Variants);
2062   }
2063 
2064   if (isForcedVOP3()) {
2065     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2066     return makeArrayRef(Variants);
2067   }
2068 
2069   if (isForcedSDWA()) {
2070     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2071                                         AMDGPUAsmVariants::SDWA9};
2072     return makeArrayRef(Variants);
2073   }
2074 
2075   if (isForcedDPP()) {
2076     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2077     return makeArrayRef(Variants);
2078   }
2079 
2080   static const unsigned Variants[] = {
2081     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2082     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2083   };
2084 
2085   return makeArrayRef(Variants);
2086 }
2087 
2088 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2089   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2090   const unsigned Num = Desc.getNumImplicitUses();
2091   for (unsigned i = 0; i < Num; ++i) {
2092     unsigned Reg = Desc.ImplicitUses[i];
2093     switch (Reg) {
2094     case AMDGPU::FLAT_SCR:
2095     case AMDGPU::VCC:
2096     case AMDGPU::M0:
2097       return Reg;
2098     default:
2099       break;
2100     }
2101   }
2102   return AMDGPU::NoRegister;
2103 }
2104 
2105 // NB: This code is correct only when used to check constant
2106 // bus limitations because GFX7 support no f16 inline constants.
2107 // Note that there are no cases when a GFX7 opcode violates
2108 // constant bus limitations due to the use of an f16 constant.
2109 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2110                                        unsigned OpIdx) const {
2111   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2112 
2113   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2114     return false;
2115   }
2116 
2117   const MCOperand &MO = Inst.getOperand(OpIdx);
2118 
2119   int64_t Val = MO.getImm();
2120   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2121 
2122   switch (OpSize) { // expected operand size
2123   case 8:
2124     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2125   case 4:
2126     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2127   case 2: {
2128     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2129     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2130         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2131       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2132     } else {
2133       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2134     }
2135   }
2136   default:
2137     llvm_unreachable("invalid operand size");
2138   }
2139 }
2140 
2141 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2142   const MCOperand &MO = Inst.getOperand(OpIdx);
2143   if (MO.isImm()) {
2144     return !isInlineConstant(Inst, OpIdx);
2145   }
2146   return !MO.isReg() ||
2147          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2148 }
2149 
2150 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2151   const unsigned Opcode = Inst.getOpcode();
2152   const MCInstrDesc &Desc = MII.get(Opcode);
2153   unsigned ConstantBusUseCount = 0;
2154 
2155   if (Desc.TSFlags &
2156       (SIInstrFlags::VOPC |
2157        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2158        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2159        SIInstrFlags::SDWA)) {
2160     // Check special imm operands (used by madmk, etc)
2161     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2162       ++ConstantBusUseCount;
2163     }
2164 
2165     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2166     if (SGPRUsed != AMDGPU::NoRegister) {
2167       ++ConstantBusUseCount;
2168     }
2169 
2170     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2171     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2172     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2173 
2174     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2175 
2176     for (int OpIdx : OpIndices) {
2177       if (OpIdx == -1) break;
2178 
2179       const MCOperand &MO = Inst.getOperand(OpIdx);
2180       if (usesConstantBus(Inst, OpIdx)) {
2181         if (MO.isReg()) {
2182           const unsigned Reg = mc2PseudoReg(MO.getReg());
2183           // Pairs of registers with a partial intersections like these
2184           //   s0, s[0:1]
2185           //   flat_scratch_lo, flat_scratch
2186           //   flat_scratch_lo, flat_scratch_hi
2187           // are theoretically valid but they are disabled anyway.
2188           // Note that this code mimics SIInstrInfo::verifyInstruction
2189           if (Reg != SGPRUsed) {
2190             ++ConstantBusUseCount;
2191           }
2192           SGPRUsed = Reg;
2193         } else { // Expression or a literal
2194           ++ConstantBusUseCount;
2195         }
2196       }
2197     }
2198   }
2199 
2200   return ConstantBusUseCount <= 1;
2201 }
2202 
2203 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2204   const unsigned Opcode = Inst.getOpcode();
2205   const MCInstrDesc &Desc = MII.get(Opcode);
2206 
2207   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2208   if (DstIdx == -1 ||
2209       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2210     return true;
2211   }
2212 
2213   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2214 
2215   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2216   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2217   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2218 
2219   assert(DstIdx != -1);
2220   const MCOperand &Dst = Inst.getOperand(DstIdx);
2221   assert(Dst.isReg());
2222   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2223 
2224   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2225 
2226   for (int SrcIdx : SrcIndices) {
2227     if (SrcIdx == -1) break;
2228     const MCOperand &Src = Inst.getOperand(SrcIdx);
2229     if (Src.isReg()) {
2230       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2231       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2232         return false;
2233       }
2234     }
2235   }
2236 
2237   return true;
2238 }
2239 
2240 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2241 
2242   const unsigned Opc = Inst.getOpcode();
2243   const MCInstrDesc &Desc = MII.get(Opc);
2244 
2245   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2246     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2247     assert(ClampIdx != -1);
2248     return Inst.getOperand(ClampIdx).getImm() == 0;
2249   }
2250 
2251   return true;
2252 }
2253 
2254 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2255                                           const SMLoc &IDLoc) {
2256   if (!validateConstantBusLimitations(Inst)) {
2257     Error(IDLoc,
2258       "invalid operand (violates constant bus restrictions)");
2259     return false;
2260   }
2261   if (!validateEarlyClobberLimitations(Inst)) {
2262     Error(IDLoc,
2263       "destination must be different than all sources");
2264     return false;
2265   }
2266   if (!validateIntClampSupported(Inst)) {
2267     Error(IDLoc,
2268       "integer clamping is not supported on this GPU");
2269     return false;
2270   }
2271 
2272   return true;
2273 }
2274 
2275 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2276                                             unsigned VariantID = 0);
2277 
2278 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2279                                               OperandVector &Operands,
2280                                               MCStreamer &Out,
2281                                               uint64_t &ErrorInfo,
2282                                               bool MatchingInlineAsm) {
2283   MCInst Inst;
2284   unsigned Result = Match_Success;
2285   for (auto Variant : getMatchedVariants()) {
2286     uint64_t EI;
2287     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2288                                   Variant);
2289     // We order match statuses from least to most specific. We use most specific
2290     // status as resulting
2291     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2292     if ((R == Match_Success) ||
2293         (R == Match_PreferE32) ||
2294         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2295         (R == Match_InvalidOperand && Result != Match_MissingFeature
2296                                    && Result != Match_PreferE32) ||
2297         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2298                                    && Result != Match_MissingFeature
2299                                    && Result != Match_PreferE32)) {
2300       Result = R;
2301       ErrorInfo = EI;
2302     }
2303     if (R == Match_Success)
2304       break;
2305   }
2306 
2307   switch (Result) {
2308   default: break;
2309   case Match_Success:
2310     if (!validateInstruction(Inst, IDLoc)) {
2311       return true;
2312     }
2313     Inst.setLoc(IDLoc);
2314     Out.EmitInstruction(Inst, getSTI());
2315     return false;
2316 
2317   case Match_MissingFeature:
2318     return Error(IDLoc, "instruction not supported on this GPU");
2319 
2320   case Match_MnemonicFail: {
2321     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2322     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2323         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2324     return Error(IDLoc, "invalid instruction" + Suggestion,
2325                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2326   }
2327 
2328   case Match_InvalidOperand: {
2329     SMLoc ErrorLoc = IDLoc;
2330     if (ErrorInfo != ~0ULL) {
2331       if (ErrorInfo >= Operands.size()) {
2332         return Error(IDLoc, "too few operands for instruction");
2333       }
2334       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2335       if (ErrorLoc == SMLoc())
2336         ErrorLoc = IDLoc;
2337     }
2338     return Error(ErrorLoc, "invalid operand for instruction");
2339   }
2340 
2341   case Match_PreferE32:
2342     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2343                         "should be encoded as e32");
2344   }
2345   llvm_unreachable("Implement any new match types added!");
2346 }
2347 
2348 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2349   int64_t Tmp = -1;
2350   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2351     return true;
2352   }
2353   if (getParser().parseAbsoluteExpression(Tmp)) {
2354     return true;
2355   }
2356   Ret = static_cast<uint32_t>(Tmp);
2357   return false;
2358 }
2359 
2360 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2361                                                uint32_t &Minor) {
2362   if (ParseAsAbsoluteExpression(Major))
2363     return TokError("invalid major version");
2364 
2365   if (getLexer().isNot(AsmToken::Comma))
2366     return TokError("minor version number required, comma expected");
2367   Lex();
2368 
2369   if (ParseAsAbsoluteExpression(Minor))
2370     return TokError("invalid minor version");
2371 
2372   return false;
2373 }
2374 
2375 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2376   uint32_t Major;
2377   uint32_t Minor;
2378 
2379   if (ParseDirectiveMajorMinor(Major, Minor))
2380     return true;
2381 
2382   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2383   return false;
2384 }
2385 
2386 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2387   uint32_t Major;
2388   uint32_t Minor;
2389   uint32_t Stepping;
2390   StringRef VendorName;
2391   StringRef ArchName;
2392 
2393   // If this directive has no arguments, then use the ISA version for the
2394   // targeted GPU.
2395   if (getLexer().is(AsmToken::EndOfStatement)) {
2396     AMDGPU::IsaInfo::IsaVersion ISA =
2397         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2398     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2399                                                       ISA.Stepping,
2400                                                       "AMD", "AMDGPU");
2401     return false;
2402   }
2403 
2404   if (ParseDirectiveMajorMinor(Major, Minor))
2405     return true;
2406 
2407   if (getLexer().isNot(AsmToken::Comma))
2408     return TokError("stepping version number required, comma expected");
2409   Lex();
2410 
2411   if (ParseAsAbsoluteExpression(Stepping))
2412     return TokError("invalid stepping version");
2413 
2414   if (getLexer().isNot(AsmToken::Comma))
2415     return TokError("vendor name required, comma expected");
2416   Lex();
2417 
2418   if (getLexer().isNot(AsmToken::String))
2419     return TokError("invalid vendor name");
2420 
2421   VendorName = getLexer().getTok().getStringContents();
2422   Lex();
2423 
2424   if (getLexer().isNot(AsmToken::Comma))
2425     return TokError("arch name required, comma expected");
2426   Lex();
2427 
2428   if (getLexer().isNot(AsmToken::String))
2429     return TokError("invalid arch name");
2430 
2431   ArchName = getLexer().getTok().getStringContents();
2432   Lex();
2433 
2434   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2435                                                     VendorName, ArchName);
2436   return false;
2437 }
2438 
2439 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2440                                                amd_kernel_code_t &Header) {
2441   SmallString<40> ErrStr;
2442   raw_svector_ostream Err(ErrStr);
2443   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2444     return TokError(Err.str());
2445   }
2446   Lex();
2447   return false;
2448 }
2449 
2450 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2451   amd_kernel_code_t Header;
2452   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2453 
2454   while (true) {
2455     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2456     // will set the current token to EndOfStatement.
2457     while(getLexer().is(AsmToken::EndOfStatement))
2458       Lex();
2459 
2460     if (getLexer().isNot(AsmToken::Identifier))
2461       return TokError("expected value identifier or .end_amd_kernel_code_t");
2462 
2463     StringRef ID = getLexer().getTok().getIdentifier();
2464     Lex();
2465 
2466     if (ID == ".end_amd_kernel_code_t")
2467       break;
2468 
2469     if (ParseAMDKernelCodeTValue(ID, Header))
2470       return true;
2471   }
2472 
2473   getTargetStreamer().EmitAMDKernelCodeT(Header);
2474 
2475   return false;
2476 }
2477 
2478 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2479   if (getLexer().isNot(AsmToken::Identifier))
2480     return TokError("expected symbol name");
2481 
2482   StringRef KernelName = Parser.getTok().getString();
2483 
2484   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2485                                            ELF::STT_AMDGPU_HSA_KERNEL);
2486   Lex();
2487   KernelScope.initialize(getContext());
2488   return false;
2489 }
2490 
2491 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2492   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
2493     return Error(getParser().getTok().getLoc(),
2494                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
2495                  "architectures");
2496   }
2497 
2498   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2499 
2500   std::string ISAVersionStringFromSTI;
2501   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2502   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2503 
2504   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2505     return Error(getParser().getTok().getLoc(),
2506                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2507                  "arguments specified through the command line");
2508   }
2509 
2510   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2511   Lex();
2512 
2513   return false;
2514 }
2515 
2516 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2517   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
2518     return Error(getParser().getTok().getLoc(),
2519                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
2520                  "not available on non-amdhsa OSes")).str());
2521   }
2522 
2523   std::string HSAMetadataString;
2524   raw_string_ostream YamlStream(HSAMetadataString);
2525 
2526   getLexer().setSkipSpace(false);
2527 
2528   bool FoundEnd = false;
2529   while (!getLexer().is(AsmToken::Eof)) {
2530     while (getLexer().is(AsmToken::Space)) {
2531       YamlStream << getLexer().getTok().getString();
2532       Lex();
2533     }
2534 
2535     if (getLexer().is(AsmToken::Identifier)) {
2536       StringRef ID = getLexer().getTok().getIdentifier();
2537       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2538         Lex();
2539         FoundEnd = true;
2540         break;
2541       }
2542     }
2543 
2544     YamlStream << Parser.parseStringToEndOfStatement()
2545                << getContext().getAsmInfo()->getSeparatorString();
2546 
2547     Parser.eatToEndOfStatement();
2548   }
2549 
2550   getLexer().setSkipSpace(true);
2551 
2552   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2553     return TokError(Twine("expected directive ") +
2554                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
2555   }
2556 
2557   YamlStream.flush();
2558 
2559   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2560     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2561 
2562   return false;
2563 }
2564 
2565 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2566   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
2567     return Error(getParser().getTok().getLoc(),
2568                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
2569                  "not available on non-amdpal OSes")).str());
2570   }
2571 
2572   PALMD::Metadata PALMetadata;
2573   for (;;) {
2574     uint32_t Value;
2575     if (ParseAsAbsoluteExpression(Value)) {
2576       return TokError(Twine("invalid value in ") +
2577                       Twine(PALMD::AssemblerDirective));
2578     }
2579     PALMetadata.push_back(Value);
2580     if (getLexer().isNot(AsmToken::Comma))
2581       break;
2582     Lex();
2583   }
2584   getTargetStreamer().EmitPALMetadata(PALMetadata);
2585   return false;
2586 }
2587 
2588 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2589   StringRef IDVal = DirectiveID.getString();
2590 
2591   if (IDVal == ".hsa_code_object_version")
2592     return ParseDirectiveHSACodeObjectVersion();
2593 
2594   if (IDVal == ".hsa_code_object_isa")
2595     return ParseDirectiveHSACodeObjectISA();
2596 
2597   if (IDVal == ".amd_kernel_code_t")
2598     return ParseDirectiveAMDKernelCodeT();
2599 
2600   if (IDVal == ".amdgpu_hsa_kernel")
2601     return ParseDirectiveAMDGPUHsaKernel();
2602 
2603   if (IDVal == ".amd_amdgpu_isa")
2604     return ParseDirectiveISAVersion();
2605 
2606   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2607     return ParseDirectiveHSAMetadata();
2608 
2609   if (IDVal == PALMD::AssemblerDirective)
2610     return ParseDirectivePALMetadata();
2611 
2612   return true;
2613 }
2614 
2615 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2616                                            unsigned RegNo) const {
2617 
2618   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
2619        R.isValid(); ++R) {
2620     if (*R == RegNo)
2621       return isGFX9();
2622   }
2623 
2624   switch (RegNo) {
2625   case AMDGPU::TBA:
2626   case AMDGPU::TBA_LO:
2627   case AMDGPU::TBA_HI:
2628   case AMDGPU::TMA:
2629   case AMDGPU::TMA_LO:
2630   case AMDGPU::TMA_HI:
2631     return !isGFX9();
2632   case AMDGPU::XNACK_MASK:
2633   case AMDGPU::XNACK_MASK_LO:
2634   case AMDGPU::XNACK_MASK_HI:
2635     return !isCI() && !isSI() && hasXNACK();
2636   default:
2637     break;
2638   }
2639 
2640   if (isCI())
2641     return true;
2642 
2643   if (isSI()) {
2644     // No flat_scr
2645     switch (RegNo) {
2646     case AMDGPU::FLAT_SCR:
2647     case AMDGPU::FLAT_SCR_LO:
2648     case AMDGPU::FLAT_SCR_HI:
2649       return false;
2650     default:
2651       return true;
2652     }
2653   }
2654 
2655   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2656   // SI/CI have.
2657   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2658        R.isValid(); ++R) {
2659     if (*R == RegNo)
2660       return false;
2661   }
2662 
2663   return true;
2664 }
2665 
2666 OperandMatchResultTy
2667 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2668   // Try to parse with a custom parser
2669   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2670 
2671   // If we successfully parsed the operand or if there as an error parsing,
2672   // we are done.
2673   //
2674   // If we are parsing after we reach EndOfStatement then this means we
2675   // are appending default values to the Operands list.  This is only done
2676   // by custom parser, so we shouldn't continue on to the generic parsing.
2677   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2678       getLexer().is(AsmToken::EndOfStatement))
2679     return ResTy;
2680 
2681   ResTy = parseRegOrImm(Operands);
2682 
2683   if (ResTy == MatchOperand_Success)
2684     return ResTy;
2685 
2686   const auto &Tok = Parser.getTok();
2687   SMLoc S = Tok.getLoc();
2688 
2689   const MCExpr *Expr = nullptr;
2690   if (!Parser.parseExpression(Expr)) {
2691     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2692     return MatchOperand_Success;
2693   }
2694 
2695   // Possibly this is an instruction flag like 'gds'.
2696   if (Tok.getKind() == AsmToken::Identifier) {
2697     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2698     Parser.Lex();
2699     return MatchOperand_Success;
2700   }
2701 
2702   return MatchOperand_NoMatch;
2703 }
2704 
2705 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2706   // Clear any forced encodings from the previous instruction.
2707   setForcedEncodingSize(0);
2708   setForcedDPP(false);
2709   setForcedSDWA(false);
2710 
2711   if (Name.endswith("_e64")) {
2712     setForcedEncodingSize(64);
2713     return Name.substr(0, Name.size() - 4);
2714   } else if (Name.endswith("_e32")) {
2715     setForcedEncodingSize(32);
2716     return Name.substr(0, Name.size() - 4);
2717   } else if (Name.endswith("_dpp")) {
2718     setForcedDPP(true);
2719     return Name.substr(0, Name.size() - 4);
2720   } else if (Name.endswith("_sdwa")) {
2721     setForcedSDWA(true);
2722     return Name.substr(0, Name.size() - 5);
2723   }
2724   return Name;
2725 }
2726 
2727 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2728                                        StringRef Name,
2729                                        SMLoc NameLoc, OperandVector &Operands) {
2730   // Add the instruction mnemonic
2731   Name = parseMnemonicSuffix(Name);
2732   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2733 
2734   while (!getLexer().is(AsmToken::EndOfStatement)) {
2735     OperandMatchResultTy Res = parseOperand(Operands, Name);
2736 
2737     // Eat the comma or space if there is one.
2738     if (getLexer().is(AsmToken::Comma))
2739       Parser.Lex();
2740 
2741     switch (Res) {
2742       case MatchOperand_Success: break;
2743       case MatchOperand_ParseFail:
2744         Error(getLexer().getLoc(), "failed parsing operand.");
2745         while (!getLexer().is(AsmToken::EndOfStatement)) {
2746           Parser.Lex();
2747         }
2748         return true;
2749       case MatchOperand_NoMatch:
2750         Error(getLexer().getLoc(), "not a valid operand.");
2751         while (!getLexer().is(AsmToken::EndOfStatement)) {
2752           Parser.Lex();
2753         }
2754         return true;
2755     }
2756   }
2757 
2758   return false;
2759 }
2760 
2761 //===----------------------------------------------------------------------===//
2762 // Utility functions
2763 //===----------------------------------------------------------------------===//
2764 
2765 OperandMatchResultTy
2766 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2767   switch(getLexer().getKind()) {
2768     default: return MatchOperand_NoMatch;
2769     case AsmToken::Identifier: {
2770       StringRef Name = Parser.getTok().getString();
2771       if (!Name.equals(Prefix)) {
2772         return MatchOperand_NoMatch;
2773       }
2774 
2775       Parser.Lex();
2776       if (getLexer().isNot(AsmToken::Colon))
2777         return MatchOperand_ParseFail;
2778 
2779       Parser.Lex();
2780 
2781       bool IsMinus = false;
2782       if (getLexer().getKind() == AsmToken::Minus) {
2783         Parser.Lex();
2784         IsMinus = true;
2785       }
2786 
2787       if (getLexer().isNot(AsmToken::Integer))
2788         return MatchOperand_ParseFail;
2789 
2790       if (getParser().parseAbsoluteExpression(Int))
2791         return MatchOperand_ParseFail;
2792 
2793       if (IsMinus)
2794         Int = -Int;
2795       break;
2796     }
2797   }
2798   return MatchOperand_Success;
2799 }
2800 
2801 OperandMatchResultTy
2802 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2803                                     AMDGPUOperand::ImmTy ImmTy,
2804                                     bool (*ConvertResult)(int64_t&)) {
2805   SMLoc S = Parser.getTok().getLoc();
2806   int64_t Value = 0;
2807 
2808   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2809   if (Res != MatchOperand_Success)
2810     return Res;
2811 
2812   if (ConvertResult && !ConvertResult(Value)) {
2813     return MatchOperand_ParseFail;
2814   }
2815 
2816   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2817   return MatchOperand_Success;
2818 }
2819 
2820 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2821   const char *Prefix,
2822   OperandVector &Operands,
2823   AMDGPUOperand::ImmTy ImmTy,
2824   bool (*ConvertResult)(int64_t&)) {
2825   StringRef Name = Parser.getTok().getString();
2826   if (!Name.equals(Prefix))
2827     return MatchOperand_NoMatch;
2828 
2829   Parser.Lex();
2830   if (getLexer().isNot(AsmToken::Colon))
2831     return MatchOperand_ParseFail;
2832 
2833   Parser.Lex();
2834   if (getLexer().isNot(AsmToken::LBrac))
2835     return MatchOperand_ParseFail;
2836   Parser.Lex();
2837 
2838   unsigned Val = 0;
2839   SMLoc S = Parser.getTok().getLoc();
2840 
2841   // FIXME: How to verify the number of elements matches the number of src
2842   // operands?
2843   for (int I = 0; I < 4; ++I) {
2844     if (I != 0) {
2845       if (getLexer().is(AsmToken::RBrac))
2846         break;
2847 
2848       if (getLexer().isNot(AsmToken::Comma))
2849         return MatchOperand_ParseFail;
2850       Parser.Lex();
2851     }
2852 
2853     if (getLexer().isNot(AsmToken::Integer))
2854       return MatchOperand_ParseFail;
2855 
2856     int64_t Op;
2857     if (getParser().parseAbsoluteExpression(Op))
2858       return MatchOperand_ParseFail;
2859 
2860     if (Op != 0 && Op != 1)
2861       return MatchOperand_ParseFail;
2862     Val |= (Op << I);
2863   }
2864 
2865   Parser.Lex();
2866   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2867   return MatchOperand_Success;
2868 }
2869 
2870 OperandMatchResultTy
2871 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2872                                AMDGPUOperand::ImmTy ImmTy) {
2873   int64_t Bit = 0;
2874   SMLoc S = Parser.getTok().getLoc();
2875 
2876   // We are at the end of the statement, and this is a default argument, so
2877   // use a default value.
2878   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2879     switch(getLexer().getKind()) {
2880       case AsmToken::Identifier: {
2881         StringRef Tok = Parser.getTok().getString();
2882         if (Tok == Name) {
2883           Bit = 1;
2884           Parser.Lex();
2885         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2886           Bit = 0;
2887           Parser.Lex();
2888         } else {
2889           return MatchOperand_NoMatch;
2890         }
2891         break;
2892       }
2893       default:
2894         return MatchOperand_NoMatch;
2895     }
2896   }
2897 
2898   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2899   return MatchOperand_Success;
2900 }
2901 
2902 static void addOptionalImmOperand(
2903   MCInst& Inst, const OperandVector& Operands,
2904   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2905   AMDGPUOperand::ImmTy ImmT,
2906   int64_t Default = 0) {
2907   auto i = OptionalIdx.find(ImmT);
2908   if (i != OptionalIdx.end()) {
2909     unsigned Idx = i->second;
2910     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2911   } else {
2912     Inst.addOperand(MCOperand::createImm(Default));
2913   }
2914 }
2915 
2916 OperandMatchResultTy
2917 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2918   if (getLexer().isNot(AsmToken::Identifier)) {
2919     return MatchOperand_NoMatch;
2920   }
2921   StringRef Tok = Parser.getTok().getString();
2922   if (Tok != Prefix) {
2923     return MatchOperand_NoMatch;
2924   }
2925 
2926   Parser.Lex();
2927   if (getLexer().isNot(AsmToken::Colon)) {
2928     return MatchOperand_ParseFail;
2929   }
2930 
2931   Parser.Lex();
2932   if (getLexer().isNot(AsmToken::Identifier)) {
2933     return MatchOperand_ParseFail;
2934   }
2935 
2936   Value = Parser.getTok().getString();
2937   return MatchOperand_Success;
2938 }
2939 
2940 //===----------------------------------------------------------------------===//
2941 // ds
2942 //===----------------------------------------------------------------------===//
2943 
2944 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2945                                     const OperandVector &Operands) {
2946   OptionalImmIndexMap OptionalIdx;
2947 
2948   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2949     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2950 
2951     // Add the register arguments
2952     if (Op.isReg()) {
2953       Op.addRegOperands(Inst, 1);
2954       continue;
2955     }
2956 
2957     // Handle optional arguments
2958     OptionalIdx[Op.getImmTy()] = i;
2959   }
2960 
2961   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2962   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2964 
2965   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2966 }
2967 
2968 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2969                                 bool IsGdsHardcoded) {
2970   OptionalImmIndexMap OptionalIdx;
2971 
2972   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2974 
2975     // Add the register arguments
2976     if (Op.isReg()) {
2977       Op.addRegOperands(Inst, 1);
2978       continue;
2979     }
2980 
2981     if (Op.isToken() && Op.getToken() == "gds") {
2982       IsGdsHardcoded = true;
2983       continue;
2984     }
2985 
2986     // Handle optional arguments
2987     OptionalIdx[Op.getImmTy()] = i;
2988   }
2989 
2990   AMDGPUOperand::ImmTy OffsetType =
2991     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2992      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2993                                                       AMDGPUOperand::ImmTyOffset;
2994 
2995   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2996 
2997   if (!IsGdsHardcoded) {
2998     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2999   }
3000   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3001 }
3002 
3003 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3004   OptionalImmIndexMap OptionalIdx;
3005 
3006   unsigned OperandIdx[4];
3007   unsigned EnMask = 0;
3008   int SrcIdx = 0;
3009 
3010   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3011     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3012 
3013     // Add the register arguments
3014     if (Op.isReg()) {
3015       assert(SrcIdx < 4);
3016       OperandIdx[SrcIdx] = Inst.size();
3017       Op.addRegOperands(Inst, 1);
3018       ++SrcIdx;
3019       continue;
3020     }
3021 
3022     if (Op.isOff()) {
3023       assert(SrcIdx < 4);
3024       OperandIdx[SrcIdx] = Inst.size();
3025       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3026       ++SrcIdx;
3027       continue;
3028     }
3029 
3030     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3031       Op.addImmOperands(Inst, 1);
3032       continue;
3033     }
3034 
3035     if (Op.isToken() && Op.getToken() == "done")
3036       continue;
3037 
3038     // Handle optional arguments
3039     OptionalIdx[Op.getImmTy()] = i;
3040   }
3041 
3042   assert(SrcIdx == 4);
3043 
3044   bool Compr = false;
3045   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3046     Compr = true;
3047     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3048     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3049     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3050   }
3051 
3052   for (auto i = 0; i < SrcIdx; ++i) {
3053     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3054       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3055     }
3056   }
3057 
3058   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3060 
3061   Inst.addOperand(MCOperand::createImm(EnMask));
3062 }
3063 
3064 //===----------------------------------------------------------------------===//
3065 // s_waitcnt
3066 //===----------------------------------------------------------------------===//
3067 
3068 static bool
3069 encodeCnt(
3070   const AMDGPU::IsaInfo::IsaVersion ISA,
3071   int64_t &IntVal,
3072   int64_t CntVal,
3073   bool Saturate,
3074   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3075   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3076 {
3077   bool Failed = false;
3078 
3079   IntVal = encode(ISA, IntVal, CntVal);
3080   if (CntVal != decode(ISA, IntVal)) {
3081     if (Saturate) {
3082       IntVal = encode(ISA, IntVal, -1);
3083     } else {
3084       Failed = true;
3085     }
3086   }
3087   return Failed;
3088 }
3089 
3090 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3091   StringRef CntName = Parser.getTok().getString();
3092   int64_t CntVal;
3093 
3094   Parser.Lex();
3095   if (getLexer().isNot(AsmToken::LParen))
3096     return true;
3097 
3098   Parser.Lex();
3099   if (getLexer().isNot(AsmToken::Integer))
3100     return true;
3101 
3102   SMLoc ValLoc = Parser.getTok().getLoc();
3103   if (getParser().parseAbsoluteExpression(CntVal))
3104     return true;
3105 
3106   AMDGPU::IsaInfo::IsaVersion ISA =
3107       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3108 
3109   bool Failed = true;
3110   bool Sat = CntName.endswith("_sat");
3111 
3112   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3113     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3114   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3115     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3116   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3117     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3118   }
3119 
3120   if (Failed) {
3121     Error(ValLoc, "too large value for " + CntName);
3122     return true;
3123   }
3124 
3125   if (getLexer().isNot(AsmToken::RParen)) {
3126     return true;
3127   }
3128 
3129   Parser.Lex();
3130   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3131     const AsmToken NextToken = getLexer().peekTok();
3132     if (NextToken.is(AsmToken::Identifier)) {
3133       Parser.Lex();
3134     }
3135   }
3136 
3137   return false;
3138 }
3139 
3140 OperandMatchResultTy
3141 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3142   AMDGPU::IsaInfo::IsaVersion ISA =
3143       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3144   int64_t Waitcnt = getWaitcntBitMask(ISA);
3145   SMLoc S = Parser.getTok().getLoc();
3146 
3147   switch(getLexer().getKind()) {
3148     default: return MatchOperand_ParseFail;
3149     case AsmToken::Integer:
3150       // The operand can be an integer value.
3151       if (getParser().parseAbsoluteExpression(Waitcnt))
3152         return MatchOperand_ParseFail;
3153       break;
3154 
3155     case AsmToken::Identifier:
3156       do {
3157         if (parseCnt(Waitcnt))
3158           return MatchOperand_ParseFail;
3159       } while(getLexer().isNot(AsmToken::EndOfStatement));
3160       break;
3161   }
3162   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3163   return MatchOperand_Success;
3164 }
3165 
3166 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3167                                           int64_t &Width) {
3168   using namespace llvm::AMDGPU::Hwreg;
3169 
3170   if (Parser.getTok().getString() != "hwreg")
3171     return true;
3172   Parser.Lex();
3173 
3174   if (getLexer().isNot(AsmToken::LParen))
3175     return true;
3176   Parser.Lex();
3177 
3178   if (getLexer().is(AsmToken::Identifier)) {
3179     HwReg.IsSymbolic = true;
3180     HwReg.Id = ID_UNKNOWN_;
3181     const StringRef tok = Parser.getTok().getString();
3182     int Last = ID_SYMBOLIC_LAST_;
3183     if (isSI() || isCI() || isVI())
3184       Last = ID_SYMBOLIC_FIRST_GFX9_;
3185     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3186       if (tok == IdSymbolic[i]) {
3187         HwReg.Id = i;
3188         break;
3189       }
3190     }
3191     Parser.Lex();
3192   } else {
3193     HwReg.IsSymbolic = false;
3194     if (getLexer().isNot(AsmToken::Integer))
3195       return true;
3196     if (getParser().parseAbsoluteExpression(HwReg.Id))
3197       return true;
3198   }
3199 
3200   if (getLexer().is(AsmToken::RParen)) {
3201     Parser.Lex();
3202     return false;
3203   }
3204 
3205   // optional params
3206   if (getLexer().isNot(AsmToken::Comma))
3207     return true;
3208   Parser.Lex();
3209 
3210   if (getLexer().isNot(AsmToken::Integer))
3211     return true;
3212   if (getParser().parseAbsoluteExpression(Offset))
3213     return true;
3214 
3215   if (getLexer().isNot(AsmToken::Comma))
3216     return true;
3217   Parser.Lex();
3218 
3219   if (getLexer().isNot(AsmToken::Integer))
3220     return true;
3221   if (getParser().parseAbsoluteExpression(Width))
3222     return true;
3223 
3224   if (getLexer().isNot(AsmToken::RParen))
3225     return true;
3226   Parser.Lex();
3227 
3228   return false;
3229 }
3230 
3231 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3232   using namespace llvm::AMDGPU::Hwreg;
3233 
3234   int64_t Imm16Val = 0;
3235   SMLoc S = Parser.getTok().getLoc();
3236 
3237   switch(getLexer().getKind()) {
3238     default: return MatchOperand_NoMatch;
3239     case AsmToken::Integer:
3240       // The operand can be an integer value.
3241       if (getParser().parseAbsoluteExpression(Imm16Val))
3242         return MatchOperand_NoMatch;
3243       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3244         Error(S, "invalid immediate: only 16-bit values are legal");
3245         // Do not return error code, but create an imm operand anyway and proceed
3246         // to the next operand, if any. That avoids unneccessary error messages.
3247       }
3248       break;
3249 
3250     case AsmToken::Identifier: {
3251         OperandInfoTy HwReg(ID_UNKNOWN_);
3252         int64_t Offset = OFFSET_DEFAULT_;
3253         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3254         if (parseHwregConstruct(HwReg, Offset, Width))
3255           return MatchOperand_ParseFail;
3256         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3257           if (HwReg.IsSymbolic)
3258             Error(S, "invalid symbolic name of hardware register");
3259           else
3260             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3261         }
3262         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3263           Error(S, "invalid bit offset: only 5-bit values are legal");
3264         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3265           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3266         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3267       }
3268       break;
3269   }
3270   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3271   return MatchOperand_Success;
3272 }
3273 
3274 bool AMDGPUOperand::isSWaitCnt() const {
3275   return isImm();
3276 }
3277 
3278 bool AMDGPUOperand::isHwreg() const {
3279   return isImmTy(ImmTyHwreg);
3280 }
3281 
3282 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3283   using namespace llvm::AMDGPU::SendMsg;
3284 
3285   if (Parser.getTok().getString() != "sendmsg")
3286     return true;
3287   Parser.Lex();
3288 
3289   if (getLexer().isNot(AsmToken::LParen))
3290     return true;
3291   Parser.Lex();
3292 
3293   if (getLexer().is(AsmToken::Identifier)) {
3294     Msg.IsSymbolic = true;
3295     Msg.Id = ID_UNKNOWN_;
3296     const std::string tok = Parser.getTok().getString();
3297     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3298       switch(i) {
3299         default: continue; // Omit gaps.
3300         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3301       }
3302       if (tok == IdSymbolic[i]) {
3303         Msg.Id = i;
3304         break;
3305       }
3306     }
3307     Parser.Lex();
3308   } else {
3309     Msg.IsSymbolic = false;
3310     if (getLexer().isNot(AsmToken::Integer))
3311       return true;
3312     if (getParser().parseAbsoluteExpression(Msg.Id))
3313       return true;
3314     if (getLexer().is(AsmToken::Integer))
3315       if (getParser().parseAbsoluteExpression(Msg.Id))
3316         Msg.Id = ID_UNKNOWN_;
3317   }
3318   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3319     return false;
3320 
3321   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3322     if (getLexer().isNot(AsmToken::RParen))
3323       return true;
3324     Parser.Lex();
3325     return false;
3326   }
3327 
3328   if (getLexer().isNot(AsmToken::Comma))
3329     return true;
3330   Parser.Lex();
3331 
3332   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3333   Operation.Id = ID_UNKNOWN_;
3334   if (getLexer().is(AsmToken::Identifier)) {
3335     Operation.IsSymbolic = true;
3336     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3337     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3338     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3339     const StringRef Tok = Parser.getTok().getString();
3340     for (int i = F; i < L; ++i) {
3341       if (Tok == S[i]) {
3342         Operation.Id = i;
3343         break;
3344       }
3345     }
3346     Parser.Lex();
3347   } else {
3348     Operation.IsSymbolic = false;
3349     if (getLexer().isNot(AsmToken::Integer))
3350       return true;
3351     if (getParser().parseAbsoluteExpression(Operation.Id))
3352       return true;
3353   }
3354 
3355   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3356     // Stream id is optional.
3357     if (getLexer().is(AsmToken::RParen)) {
3358       Parser.Lex();
3359       return false;
3360     }
3361 
3362     if (getLexer().isNot(AsmToken::Comma))
3363       return true;
3364     Parser.Lex();
3365 
3366     if (getLexer().isNot(AsmToken::Integer))
3367       return true;
3368     if (getParser().parseAbsoluteExpression(StreamId))
3369       return true;
3370   }
3371 
3372   if (getLexer().isNot(AsmToken::RParen))
3373     return true;
3374   Parser.Lex();
3375   return false;
3376 }
3377 
3378 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3379   if (getLexer().getKind() != AsmToken::Identifier)
3380     return MatchOperand_NoMatch;
3381 
3382   StringRef Str = Parser.getTok().getString();
3383   int Slot = StringSwitch<int>(Str)
3384     .Case("p10", 0)
3385     .Case("p20", 1)
3386     .Case("p0", 2)
3387     .Default(-1);
3388 
3389   SMLoc S = Parser.getTok().getLoc();
3390   if (Slot == -1)
3391     return MatchOperand_ParseFail;
3392 
3393   Parser.Lex();
3394   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3395                                               AMDGPUOperand::ImmTyInterpSlot));
3396   return MatchOperand_Success;
3397 }
3398 
3399 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3400   if (getLexer().getKind() != AsmToken::Identifier)
3401     return MatchOperand_NoMatch;
3402 
3403   StringRef Str = Parser.getTok().getString();
3404   if (!Str.startswith("attr"))
3405     return MatchOperand_NoMatch;
3406 
3407   StringRef Chan = Str.take_back(2);
3408   int AttrChan = StringSwitch<int>(Chan)
3409     .Case(".x", 0)
3410     .Case(".y", 1)
3411     .Case(".z", 2)
3412     .Case(".w", 3)
3413     .Default(-1);
3414   if (AttrChan == -1)
3415     return MatchOperand_ParseFail;
3416 
3417   Str = Str.drop_back(2).drop_front(4);
3418 
3419   uint8_t Attr;
3420   if (Str.getAsInteger(10, Attr))
3421     return MatchOperand_ParseFail;
3422 
3423   SMLoc S = Parser.getTok().getLoc();
3424   Parser.Lex();
3425   if (Attr > 63) {
3426     Error(S, "out of bounds attr");
3427     return MatchOperand_Success;
3428   }
3429 
3430   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3431 
3432   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3433                                               AMDGPUOperand::ImmTyInterpAttr));
3434   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3435                                               AMDGPUOperand::ImmTyAttrChan));
3436   return MatchOperand_Success;
3437 }
3438 
3439 void AMDGPUAsmParser::errorExpTgt() {
3440   Error(Parser.getTok().getLoc(), "invalid exp target");
3441 }
3442 
3443 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3444                                                       uint8_t &Val) {
3445   if (Str == "null") {
3446     Val = 9;
3447     return MatchOperand_Success;
3448   }
3449 
3450   if (Str.startswith("mrt")) {
3451     Str = Str.drop_front(3);
3452     if (Str == "z") { // == mrtz
3453       Val = 8;
3454       return MatchOperand_Success;
3455     }
3456 
3457     if (Str.getAsInteger(10, Val))
3458       return MatchOperand_ParseFail;
3459 
3460     if (Val > 7)
3461       errorExpTgt();
3462 
3463     return MatchOperand_Success;
3464   }
3465 
3466   if (Str.startswith("pos")) {
3467     Str = Str.drop_front(3);
3468     if (Str.getAsInteger(10, Val))
3469       return MatchOperand_ParseFail;
3470 
3471     if (Val > 3)
3472       errorExpTgt();
3473 
3474     Val += 12;
3475     return MatchOperand_Success;
3476   }
3477 
3478   if (Str.startswith("param")) {
3479     Str = Str.drop_front(5);
3480     if (Str.getAsInteger(10, Val))
3481       return MatchOperand_ParseFail;
3482 
3483     if (Val >= 32)
3484       errorExpTgt();
3485 
3486     Val += 32;
3487     return MatchOperand_Success;
3488   }
3489 
3490   if (Str.startswith("invalid_target_")) {
3491     Str = Str.drop_front(15);
3492     if (Str.getAsInteger(10, Val))
3493       return MatchOperand_ParseFail;
3494 
3495     errorExpTgt();
3496     return MatchOperand_Success;
3497   }
3498 
3499   return MatchOperand_NoMatch;
3500 }
3501 
3502 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3503   uint8_t Val;
3504   StringRef Str = Parser.getTok().getString();
3505 
3506   auto Res = parseExpTgtImpl(Str, Val);
3507   if (Res != MatchOperand_Success)
3508     return Res;
3509 
3510   SMLoc S = Parser.getTok().getLoc();
3511   Parser.Lex();
3512 
3513   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3514                                               AMDGPUOperand::ImmTyExpTgt));
3515   return MatchOperand_Success;
3516 }
3517 
3518 OperandMatchResultTy
3519 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3520   using namespace llvm::AMDGPU::SendMsg;
3521 
3522   int64_t Imm16Val = 0;
3523   SMLoc S = Parser.getTok().getLoc();
3524 
3525   switch(getLexer().getKind()) {
3526   default:
3527     return MatchOperand_NoMatch;
3528   case AsmToken::Integer:
3529     // The operand can be an integer value.
3530     if (getParser().parseAbsoluteExpression(Imm16Val))
3531       return MatchOperand_NoMatch;
3532     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3533       Error(S, "invalid immediate: only 16-bit values are legal");
3534       // Do not return error code, but create an imm operand anyway and proceed
3535       // to the next operand, if any. That avoids unneccessary error messages.
3536     }
3537     break;
3538   case AsmToken::Identifier: {
3539       OperandInfoTy Msg(ID_UNKNOWN_);
3540       OperandInfoTy Operation(OP_UNKNOWN_);
3541       int64_t StreamId = STREAM_ID_DEFAULT_;
3542       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3543         return MatchOperand_ParseFail;
3544       do {
3545         // Validate and encode message ID.
3546         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3547                 || Msg.Id == ID_SYSMSG)) {
3548           if (Msg.IsSymbolic)
3549             Error(S, "invalid/unsupported symbolic name of message");
3550           else
3551             Error(S, "invalid/unsupported code of message");
3552           break;
3553         }
3554         Imm16Val = (Msg.Id << ID_SHIFT_);
3555         // Validate and encode operation ID.
3556         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3557           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3558             if (Operation.IsSymbolic)
3559               Error(S, "invalid symbolic name of GS_OP");
3560             else
3561               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3562             break;
3563           }
3564           if (Operation.Id == OP_GS_NOP
3565               && Msg.Id != ID_GS_DONE) {
3566             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3567             break;
3568           }
3569           Imm16Val |= (Operation.Id << OP_SHIFT_);
3570         }
3571         if (Msg.Id == ID_SYSMSG) {
3572           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3573             if (Operation.IsSymbolic)
3574               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3575             else
3576               Error(S, "invalid/unsupported code of SYSMSG_OP");
3577             break;
3578           }
3579           Imm16Val |= (Operation.Id << OP_SHIFT_);
3580         }
3581         // Validate and encode stream ID.
3582         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3583           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3584             Error(S, "invalid stream id: only 2-bit values are legal");
3585             break;
3586           }
3587           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3588         }
3589       } while (false);
3590     }
3591     break;
3592   }
3593   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3594   return MatchOperand_Success;
3595 }
3596 
3597 bool AMDGPUOperand::isSendMsg() const {
3598   return isImmTy(ImmTySendMsg);
3599 }
3600 
3601 //===----------------------------------------------------------------------===//
3602 // parser helpers
3603 //===----------------------------------------------------------------------===//
3604 
3605 bool
3606 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3607   if (getLexer().getKind() == AsmToken::Identifier &&
3608       Parser.getTok().getString() == Id) {
3609     Parser.Lex();
3610     return true;
3611   }
3612   return false;
3613 }
3614 
3615 bool
3616 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3617   if (getLexer().getKind() == Kind) {
3618     Parser.Lex();
3619     return true;
3620   }
3621   return false;
3622 }
3623 
3624 bool
3625 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3626                            const StringRef ErrMsg) {
3627   if (!trySkipToken(Kind)) {
3628     Error(Parser.getTok().getLoc(), ErrMsg);
3629     return false;
3630   }
3631   return true;
3632 }
3633 
3634 bool
3635 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3636   return !getParser().parseAbsoluteExpression(Imm);
3637 }
3638 
3639 bool
3640 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3641   SMLoc S = Parser.getTok().getLoc();
3642   if (getLexer().getKind() == AsmToken::String) {
3643     Val = Parser.getTok().getStringContents();
3644     Parser.Lex();
3645     return true;
3646   } else {
3647     Error(S, ErrMsg);
3648     return false;
3649   }
3650 }
3651 
3652 //===----------------------------------------------------------------------===//
3653 // swizzle
3654 //===----------------------------------------------------------------------===//
3655 
3656 LLVM_READNONE
3657 static unsigned
3658 encodeBitmaskPerm(const unsigned AndMask,
3659                   const unsigned OrMask,
3660                   const unsigned XorMask) {
3661   using namespace llvm::AMDGPU::Swizzle;
3662 
3663   return BITMASK_PERM_ENC |
3664          (AndMask << BITMASK_AND_SHIFT) |
3665          (OrMask  << BITMASK_OR_SHIFT)  |
3666          (XorMask << BITMASK_XOR_SHIFT);
3667 }
3668 
3669 bool
3670 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3671                                       const unsigned MinVal,
3672                                       const unsigned MaxVal,
3673                                       const StringRef ErrMsg) {
3674   for (unsigned i = 0; i < OpNum; ++i) {
3675     if (!skipToken(AsmToken::Comma, "expected a comma")){
3676       return false;
3677     }
3678     SMLoc ExprLoc = Parser.getTok().getLoc();
3679     if (!parseExpr(Op[i])) {
3680       return false;
3681     }
3682     if (Op[i] < MinVal || Op[i] > MaxVal) {
3683       Error(ExprLoc, ErrMsg);
3684       return false;
3685     }
3686   }
3687 
3688   return true;
3689 }
3690 
3691 bool
3692 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3693   using namespace llvm::AMDGPU::Swizzle;
3694 
3695   int64_t Lane[LANE_NUM];
3696   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3697                            "expected a 2-bit lane id")) {
3698     Imm = QUAD_PERM_ENC;
3699     for (auto i = 0; i < LANE_NUM; ++i) {
3700       Imm |= Lane[i] << (LANE_SHIFT * i);
3701     }
3702     return true;
3703   }
3704   return false;
3705 }
3706 
3707 bool
3708 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3709   using namespace llvm::AMDGPU::Swizzle;
3710 
3711   SMLoc S = Parser.getTok().getLoc();
3712   int64_t GroupSize;
3713   int64_t LaneIdx;
3714 
3715   if (!parseSwizzleOperands(1, &GroupSize,
3716                             2, 32,
3717                             "group size must be in the interval [2,32]")) {
3718     return false;
3719   }
3720   if (!isPowerOf2_64(GroupSize)) {
3721     Error(S, "group size must be a power of two");
3722     return false;
3723   }
3724   if (parseSwizzleOperands(1, &LaneIdx,
3725                            0, GroupSize - 1,
3726                            "lane id must be in the interval [0,group size - 1]")) {
3727     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3728     return true;
3729   }
3730   return false;
3731 }
3732 
3733 bool
3734 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3735   using namespace llvm::AMDGPU::Swizzle;
3736 
3737   SMLoc S = Parser.getTok().getLoc();
3738   int64_t GroupSize;
3739 
3740   if (!parseSwizzleOperands(1, &GroupSize,
3741       2, 32, "group size must be in the interval [2,32]")) {
3742     return false;
3743   }
3744   if (!isPowerOf2_64(GroupSize)) {
3745     Error(S, "group size must be a power of two");
3746     return false;
3747   }
3748 
3749   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3750   return true;
3751 }
3752 
3753 bool
3754 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3755   using namespace llvm::AMDGPU::Swizzle;
3756 
3757   SMLoc S = Parser.getTok().getLoc();
3758   int64_t GroupSize;
3759 
3760   if (!parseSwizzleOperands(1, &GroupSize,
3761       1, 16, "group size must be in the interval [1,16]")) {
3762     return false;
3763   }
3764   if (!isPowerOf2_64(GroupSize)) {
3765     Error(S, "group size must be a power of two");
3766     return false;
3767   }
3768 
3769   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3770   return true;
3771 }
3772 
3773 bool
3774 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3775   using namespace llvm::AMDGPU::Swizzle;
3776 
3777   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3778     return false;
3779   }
3780 
3781   StringRef Ctl;
3782   SMLoc StrLoc = Parser.getTok().getLoc();
3783   if (!parseString(Ctl)) {
3784     return false;
3785   }
3786   if (Ctl.size() != BITMASK_WIDTH) {
3787     Error(StrLoc, "expected a 5-character mask");
3788     return false;
3789   }
3790 
3791   unsigned AndMask = 0;
3792   unsigned OrMask = 0;
3793   unsigned XorMask = 0;
3794 
3795   for (size_t i = 0; i < Ctl.size(); ++i) {
3796     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3797     switch(Ctl[i]) {
3798     default:
3799       Error(StrLoc, "invalid mask");
3800       return false;
3801     case '0':
3802       break;
3803     case '1':
3804       OrMask |= Mask;
3805       break;
3806     case 'p':
3807       AndMask |= Mask;
3808       break;
3809     case 'i':
3810       AndMask |= Mask;
3811       XorMask |= Mask;
3812       break;
3813     }
3814   }
3815 
3816   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3817   return true;
3818 }
3819 
3820 bool
3821 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3822 
3823   SMLoc OffsetLoc = Parser.getTok().getLoc();
3824 
3825   if (!parseExpr(Imm)) {
3826     return false;
3827   }
3828   if (!isUInt<16>(Imm)) {
3829     Error(OffsetLoc, "expected a 16-bit offset");
3830     return false;
3831   }
3832   return true;
3833 }
3834 
3835 bool
3836 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3837   using namespace llvm::AMDGPU::Swizzle;
3838 
3839   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3840 
3841     SMLoc ModeLoc = Parser.getTok().getLoc();
3842     bool Ok = false;
3843 
3844     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3845       Ok = parseSwizzleQuadPerm(Imm);
3846     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3847       Ok = parseSwizzleBitmaskPerm(Imm);
3848     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3849       Ok = parseSwizzleBroadcast(Imm);
3850     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3851       Ok = parseSwizzleSwap(Imm);
3852     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3853       Ok = parseSwizzleReverse(Imm);
3854     } else {
3855       Error(ModeLoc, "expected a swizzle mode");
3856     }
3857 
3858     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3859   }
3860 
3861   return false;
3862 }
3863 
3864 OperandMatchResultTy
3865 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3866   SMLoc S = Parser.getTok().getLoc();
3867   int64_t Imm = 0;
3868 
3869   if (trySkipId("offset")) {
3870 
3871     bool Ok = false;
3872     if (skipToken(AsmToken::Colon, "expected a colon")) {
3873       if (trySkipId("swizzle")) {
3874         Ok = parseSwizzleMacro(Imm);
3875       } else {
3876         Ok = parseSwizzleOffset(Imm);
3877       }
3878     }
3879 
3880     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3881 
3882     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3883   } else {
3884     // Swizzle "offset" operand is optional.
3885     // If it is omitted, try parsing other optional operands.
3886     return parseOptionalOpr(Operands);
3887   }
3888 }
3889 
3890 bool
3891 AMDGPUOperand::isSwizzle() const {
3892   return isImmTy(ImmTySwizzle);
3893 }
3894 
3895 //===----------------------------------------------------------------------===//
3896 // sopp branch targets
3897 //===----------------------------------------------------------------------===//
3898 
3899 OperandMatchResultTy
3900 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3901   SMLoc S = Parser.getTok().getLoc();
3902 
3903   switch (getLexer().getKind()) {
3904     default: return MatchOperand_ParseFail;
3905     case AsmToken::Integer: {
3906       int64_t Imm;
3907       if (getParser().parseAbsoluteExpression(Imm))
3908         return MatchOperand_ParseFail;
3909       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3910       return MatchOperand_Success;
3911     }
3912 
3913     case AsmToken::Identifier:
3914       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3915           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3916                                   Parser.getTok().getString()), getContext()), S));
3917       Parser.Lex();
3918       return MatchOperand_Success;
3919   }
3920 }
3921 
3922 //===----------------------------------------------------------------------===//
3923 // mubuf
3924 //===----------------------------------------------------------------------===//
3925 
3926 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3927   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3928 }
3929 
3930 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3931   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3932 }
3933 
3934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3935   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3936 }
3937 
3938 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3939                                const OperandVector &Operands,
3940                                bool IsAtomic, bool IsAtomicReturn) {
3941   OptionalImmIndexMap OptionalIdx;
3942   assert(IsAtomicReturn ? IsAtomic : true);
3943 
3944   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3945     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3946 
3947     // Add the register arguments
3948     if (Op.isReg()) {
3949       Op.addRegOperands(Inst, 1);
3950       continue;
3951     }
3952 
3953     // Handle the case where soffset is an immediate
3954     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3955       Op.addImmOperands(Inst, 1);
3956       continue;
3957     }
3958 
3959     // Handle tokens like 'offen' which are sometimes hard-coded into the
3960     // asm string.  There are no MCInst operands for these.
3961     if (Op.isToken()) {
3962       continue;
3963     }
3964     assert(Op.isImm());
3965 
3966     // Handle optional arguments
3967     OptionalIdx[Op.getImmTy()] = i;
3968   }
3969 
3970   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3971   if (IsAtomicReturn) {
3972     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3973     Inst.insert(I, *I);
3974   }
3975 
3976   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3977   if (!IsAtomic) { // glc is hard-coded.
3978     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3979   }
3980   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3981   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3982 }
3983 
3984 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3985   OptionalImmIndexMap OptionalIdx;
3986 
3987   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3988     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3989 
3990     // Add the register arguments
3991     if (Op.isReg()) {
3992       Op.addRegOperands(Inst, 1);
3993       continue;
3994     }
3995 
3996     // Handle the case where soffset is an immediate
3997     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3998       Op.addImmOperands(Inst, 1);
3999       continue;
4000     }
4001 
4002     // Handle tokens like 'offen' which are sometimes hard-coded into the
4003     // asm string.  There are no MCInst operands for these.
4004     if (Op.isToken()) {
4005       continue;
4006     }
4007     assert(Op.isImm());
4008 
4009     // Handle optional arguments
4010     OptionalIdx[Op.getImmTy()] = i;
4011   }
4012 
4013   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4014                         AMDGPUOperand::ImmTyOffset);
4015   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4016   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4017   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4018   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4019   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4020 }
4021 
4022 //===----------------------------------------------------------------------===//
4023 // mimg
4024 //===----------------------------------------------------------------------===//
4025 
4026 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4027                               bool IsAtomic) {
4028   unsigned I = 1;
4029   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4030   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4031     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4032   }
4033 
4034   if (IsAtomic) {
4035     // Add src, same as dst
4036     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
4037   }
4038 
4039   OptionalImmIndexMap OptionalIdx;
4040 
4041   for (unsigned E = Operands.size(); I != E; ++I) {
4042     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4043 
4044     // Add the register arguments
4045     if (Op.isRegOrImm()) {
4046       Op.addRegOrImmOperands(Inst, 1);
4047       continue;
4048     } else if (Op.isImmModifier()) {
4049       OptionalIdx[Op.getImmTy()] = I;
4050     } else {
4051       llvm_unreachable("unexpected operand type");
4052     }
4053   }
4054 
4055   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4056   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4057   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4058   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4060   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4061   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4063 }
4064 
4065 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4066   cvtMIMG(Inst, Operands, true);
4067 }
4068 
4069 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
4070   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
4071 }
4072 
4073 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
4074   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
4075 }
4076 
4077 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
4078   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
4079 }
4080 
4081 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4082   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4083 }
4084 
4085 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4086   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4087 }
4088 
4089 //===----------------------------------------------------------------------===//
4090 // smrd
4091 //===----------------------------------------------------------------------===//
4092 
4093 bool AMDGPUOperand::isSMRDOffset8() const {
4094   return isImm() && isUInt<8>(getImm());
4095 }
4096 
4097 bool AMDGPUOperand::isSMRDOffset20() const {
4098   return isImm() && isUInt<20>(getImm());
4099 }
4100 
4101 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4102   // 32-bit literals are only supported on CI and we only want to use them
4103   // when the offset is > 8-bits.
4104   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4105 }
4106 
4107 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4108   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4109 }
4110 
4111 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4112   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4113 }
4114 
4115 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4116   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4117 }
4118 
4119 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4120   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4121 }
4122 
4123 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4124   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4125 }
4126 
4127 //===----------------------------------------------------------------------===//
4128 // vop3
4129 //===----------------------------------------------------------------------===//
4130 
4131 static bool ConvertOmodMul(int64_t &Mul) {
4132   if (Mul != 1 && Mul != 2 && Mul != 4)
4133     return false;
4134 
4135   Mul >>= 1;
4136   return true;
4137 }
4138 
4139 static bool ConvertOmodDiv(int64_t &Div) {
4140   if (Div == 1) {
4141     Div = 0;
4142     return true;
4143   }
4144 
4145   if (Div == 2) {
4146     Div = 3;
4147     return true;
4148   }
4149 
4150   return false;
4151 }
4152 
4153 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4154   if (BoundCtrl == 0) {
4155     BoundCtrl = 1;
4156     return true;
4157   }
4158 
4159   if (BoundCtrl == -1) {
4160     BoundCtrl = 0;
4161     return true;
4162   }
4163 
4164   return false;
4165 }
4166 
4167 // Note: the order in this table matches the order of operands in AsmString.
4168 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4169   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4170   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4171   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4172   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4173   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4174   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4175   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4176   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4177   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4178   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4179   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4180   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4181   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4182   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4183   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4184   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4185   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4186   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4187   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4188   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4189   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4190   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4191   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4192   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4193   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4194   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4195   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4196   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4197   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4198   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4199   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4200   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4201   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4202   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4203 };
4204 
4205 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4206   unsigned size = Operands.size();
4207   assert(size > 0);
4208 
4209   OperandMatchResultTy res = parseOptionalOpr(Operands);
4210 
4211   // This is a hack to enable hardcoded mandatory operands which follow
4212   // optional operands.
4213   //
4214   // Current design assumes that all operands after the first optional operand
4215   // are also optional. However implementation of some instructions violates
4216   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4217   //
4218   // To alleviate this problem, we have to (implicitly) parse extra operands
4219   // to make sure autogenerated parser of custom operands never hit hardcoded
4220   // mandatory operands.
4221 
4222   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4223 
4224     // We have parsed the first optional operand.
4225     // Parse as many operands as necessary to skip all mandatory operands.
4226 
4227     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4228       if (res != MatchOperand_Success ||
4229           getLexer().is(AsmToken::EndOfStatement)) break;
4230       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4231       res = parseOptionalOpr(Operands);
4232     }
4233   }
4234 
4235   return res;
4236 }
4237 
4238 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4239   OperandMatchResultTy res;
4240   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4241     // try to parse any optional operand here
4242     if (Op.IsBit) {
4243       res = parseNamedBit(Op.Name, Operands, Op.Type);
4244     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4245       res = parseOModOperand(Operands);
4246     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4247                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4248                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4249       res = parseSDWASel(Operands, Op.Name, Op.Type);
4250     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4251       res = parseSDWADstUnused(Operands);
4252     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4253                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4254                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4255                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4256       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4257                                         Op.ConvertResult);
4258     } else {
4259       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4260     }
4261     if (res != MatchOperand_NoMatch) {
4262       return res;
4263     }
4264   }
4265   return MatchOperand_NoMatch;
4266 }
4267 
4268 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4269   StringRef Name = Parser.getTok().getString();
4270   if (Name == "mul") {
4271     return parseIntWithPrefix("mul", Operands,
4272                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4273   }
4274 
4275   if (Name == "div") {
4276     return parseIntWithPrefix("div", Operands,
4277                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4278   }
4279 
4280   return MatchOperand_NoMatch;
4281 }
4282 
4283 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4284   cvtVOP3P(Inst, Operands);
4285 
4286   int Opc = Inst.getOpcode();
4287 
4288   int SrcNum;
4289   const int Ops[] = { AMDGPU::OpName::src0,
4290                       AMDGPU::OpName::src1,
4291                       AMDGPU::OpName::src2 };
4292   for (SrcNum = 0;
4293        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4294        ++SrcNum);
4295   assert(SrcNum > 0);
4296 
4297   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4298   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4299 
4300   if ((OpSel & (1 << SrcNum)) != 0) {
4301     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4302     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4303     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4304   }
4305 }
4306 
4307 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4308       // 1. This operand is input modifiers
4309   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4310       // 2. This is not last operand
4311       && Desc.NumOperands > (OpNum + 1)
4312       // 3. Next operand is register class
4313       && Desc.OpInfo[OpNum + 1].RegClass != -1
4314       // 4. Next register is not tied to any other operand
4315       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4316 }
4317 
4318 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4319 {
4320   OptionalImmIndexMap OptionalIdx;
4321   unsigned Opc = Inst.getOpcode();
4322 
4323   unsigned I = 1;
4324   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4325   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4326     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4327   }
4328 
4329   for (unsigned E = Operands.size(); I != E; ++I) {
4330     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4331     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4332       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4333     } else if (Op.isInterpSlot() ||
4334                Op.isInterpAttr() ||
4335                Op.isAttrChan()) {
4336       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4337     } else if (Op.isImmModifier()) {
4338       OptionalIdx[Op.getImmTy()] = I;
4339     } else {
4340       llvm_unreachable("unhandled operand type");
4341     }
4342   }
4343 
4344   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4345     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4346   }
4347 
4348   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4349     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4350   }
4351 
4352   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4353     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4354   }
4355 }
4356 
4357 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4358                               OptionalImmIndexMap &OptionalIdx) {
4359   unsigned Opc = Inst.getOpcode();
4360 
4361   unsigned I = 1;
4362   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4363   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4364     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4365   }
4366 
4367   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4368     // This instruction has src modifiers
4369     for (unsigned E = Operands.size(); I != E; ++I) {
4370       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4371       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4372         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4373       } else if (Op.isImmModifier()) {
4374         OptionalIdx[Op.getImmTy()] = I;
4375       } else if (Op.isRegOrImm()) {
4376         Op.addRegOrImmOperands(Inst, 1);
4377       } else {
4378         llvm_unreachable("unhandled operand type");
4379       }
4380     }
4381   } else {
4382     // No src modifiers
4383     for (unsigned E = Operands.size(); I != E; ++I) {
4384       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4385       if (Op.isMod()) {
4386         OptionalIdx[Op.getImmTy()] = I;
4387       } else {
4388         Op.addRegOrImmOperands(Inst, 1);
4389       }
4390     }
4391   }
4392 
4393   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4394     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4395   }
4396 
4397   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4398     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4399   }
4400 
4401   // special case v_mac_{f16, f32}:
4402   // it has src2 register operand that is tied to dst operand
4403   // we don't allow modifiers for this operand in assembler so src2_modifiers
4404   // should be 0
4405   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4406       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4407     auto it = Inst.begin();
4408     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4409     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4410     ++it;
4411     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4412   }
4413 }
4414 
4415 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4416   OptionalImmIndexMap OptionalIdx;
4417   cvtVOP3(Inst, Operands, OptionalIdx);
4418 }
4419 
4420 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
4421                                const OperandVector &Operands) {
4422   OptionalImmIndexMap OptIdx;
4423   const int Opc = Inst.getOpcode();
4424   const MCInstrDesc &Desc = MII.get(Opc);
4425 
4426   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
4427 
4428   cvtVOP3(Inst, Operands, OptIdx);
4429 
4430   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4431     assert(!IsPacked);
4432     Inst.addOperand(Inst.getOperand(0));
4433   }
4434 
4435   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4436   // instruction, and then figure out where to actually put the modifiers
4437 
4438   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4439 
4440   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4441   if (OpSelHiIdx != -1) {
4442     int DefaultVal = IsPacked ? -1 : 0;
4443     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4444                           DefaultVal);
4445   }
4446 
4447   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4448   if (NegLoIdx != -1) {
4449     assert(IsPacked);
4450     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4451     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4452   }
4453 
4454   const int Ops[] = { AMDGPU::OpName::src0,
4455                       AMDGPU::OpName::src1,
4456                       AMDGPU::OpName::src2 };
4457   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4458                          AMDGPU::OpName::src1_modifiers,
4459                          AMDGPU::OpName::src2_modifiers };
4460 
4461   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4462 
4463   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4464   unsigned OpSelHi = 0;
4465   unsigned NegLo = 0;
4466   unsigned NegHi = 0;
4467 
4468   if (OpSelHiIdx != -1) {
4469     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4470   }
4471 
4472   if (NegLoIdx != -1) {
4473     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4474     NegLo = Inst.getOperand(NegLoIdx).getImm();
4475     NegHi = Inst.getOperand(NegHiIdx).getImm();
4476   }
4477 
4478   for (int J = 0; J < 3; ++J) {
4479     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4480     if (OpIdx == -1)
4481       break;
4482 
4483     uint32_t ModVal = 0;
4484 
4485     if ((OpSel & (1 << J)) != 0)
4486       ModVal |= SISrcMods::OP_SEL_0;
4487 
4488     if ((OpSelHi & (1 << J)) != 0)
4489       ModVal |= SISrcMods::OP_SEL_1;
4490 
4491     if ((NegLo & (1 << J)) != 0)
4492       ModVal |= SISrcMods::NEG;
4493 
4494     if ((NegHi & (1 << J)) != 0)
4495       ModVal |= SISrcMods::NEG_HI;
4496 
4497     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4498 
4499     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4500   }
4501 }
4502 
4503 //===----------------------------------------------------------------------===//
4504 // dpp
4505 //===----------------------------------------------------------------------===//
4506 
4507 bool AMDGPUOperand::isDPPCtrl() const {
4508   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4509   if (result) {
4510     int64_t Imm = getImm();
4511     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4512            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4513            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4514            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4515            (Imm == 0x130) ||
4516            (Imm == 0x134) ||
4517            (Imm == 0x138) ||
4518            (Imm == 0x13c) ||
4519            (Imm == 0x140) ||
4520            (Imm == 0x141) ||
4521            (Imm == 0x142) ||
4522            (Imm == 0x143);
4523   }
4524   return false;
4525 }
4526 
4527 bool AMDGPUOperand::isGPRIdxMode() const {
4528   return isImm() && isUInt<4>(getImm());
4529 }
4530 
4531 bool AMDGPUOperand::isS16Imm() const {
4532   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4533 }
4534 
4535 bool AMDGPUOperand::isU16Imm() const {
4536   return isImm() && isUInt<16>(getImm());
4537 }
4538 
4539 OperandMatchResultTy
4540 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4541   SMLoc S = Parser.getTok().getLoc();
4542   StringRef Prefix;
4543   int64_t Int;
4544 
4545   if (getLexer().getKind() == AsmToken::Identifier) {
4546     Prefix = Parser.getTok().getString();
4547   } else {
4548     return MatchOperand_NoMatch;
4549   }
4550 
4551   if (Prefix == "row_mirror") {
4552     Int = 0x140;
4553     Parser.Lex();
4554   } else if (Prefix == "row_half_mirror") {
4555     Int = 0x141;
4556     Parser.Lex();
4557   } else {
4558     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4559     if (Prefix != "quad_perm"
4560         && Prefix != "row_shl"
4561         && Prefix != "row_shr"
4562         && Prefix != "row_ror"
4563         && Prefix != "wave_shl"
4564         && Prefix != "wave_rol"
4565         && Prefix != "wave_shr"
4566         && Prefix != "wave_ror"
4567         && Prefix != "row_bcast") {
4568       return MatchOperand_NoMatch;
4569     }
4570 
4571     Parser.Lex();
4572     if (getLexer().isNot(AsmToken::Colon))
4573       return MatchOperand_ParseFail;
4574 
4575     if (Prefix == "quad_perm") {
4576       // quad_perm:[%d,%d,%d,%d]
4577       Parser.Lex();
4578       if (getLexer().isNot(AsmToken::LBrac))
4579         return MatchOperand_ParseFail;
4580       Parser.Lex();
4581 
4582       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4583         return MatchOperand_ParseFail;
4584 
4585       for (int i = 0; i < 3; ++i) {
4586         if (getLexer().isNot(AsmToken::Comma))
4587           return MatchOperand_ParseFail;
4588         Parser.Lex();
4589 
4590         int64_t Temp;
4591         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4592           return MatchOperand_ParseFail;
4593         const int shift = i*2 + 2;
4594         Int += (Temp << shift);
4595       }
4596 
4597       if (getLexer().isNot(AsmToken::RBrac))
4598         return MatchOperand_ParseFail;
4599       Parser.Lex();
4600     } else {
4601       // sel:%d
4602       Parser.Lex();
4603       if (getParser().parseAbsoluteExpression(Int))
4604         return MatchOperand_ParseFail;
4605 
4606       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4607         Int |= 0x100;
4608       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4609         Int |= 0x110;
4610       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4611         Int |= 0x120;
4612       } else if (Prefix == "wave_shl" && 1 == Int) {
4613         Int = 0x130;
4614       } else if (Prefix == "wave_rol" && 1 == Int) {
4615         Int = 0x134;
4616       } else if (Prefix == "wave_shr" && 1 == Int) {
4617         Int = 0x138;
4618       } else if (Prefix == "wave_ror" && 1 == Int) {
4619         Int = 0x13C;
4620       } else if (Prefix == "row_bcast") {
4621         if (Int == 15) {
4622           Int = 0x142;
4623         } else if (Int == 31) {
4624           Int = 0x143;
4625         } else {
4626           return MatchOperand_ParseFail;
4627         }
4628       } else {
4629         return MatchOperand_ParseFail;
4630       }
4631     }
4632   }
4633 
4634   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4635   return MatchOperand_Success;
4636 }
4637 
4638 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4639   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4640 }
4641 
4642 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4643   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4644 }
4645 
4646 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4647   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4648 }
4649 
4650 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4651   OptionalImmIndexMap OptionalIdx;
4652 
4653   unsigned I = 1;
4654   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4655   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4656     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4657   }
4658 
4659   // All DPP instructions with at least one source operand have a fake "old"
4660   // source at the beginning that's tied to the dst operand. Handle it here.
4661   if (Desc.getNumOperands() >= 2)
4662     Inst.addOperand(Inst.getOperand(0));
4663 
4664   for (unsigned E = Operands.size(); I != E; ++I) {
4665     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4666     // Add the register arguments
4667     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4668       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4669       // Skip it.
4670       continue;
4671     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4672       Op.addRegWithFPInputModsOperands(Inst, 2);
4673     } else if (Op.isDPPCtrl()) {
4674       Op.addImmOperands(Inst, 1);
4675     } else if (Op.isImm()) {
4676       // Handle optional arguments
4677       OptionalIdx[Op.getImmTy()] = I;
4678     } else {
4679       llvm_unreachable("Invalid operand type");
4680     }
4681   }
4682 
4683   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4684   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4686 }
4687 
4688 //===----------------------------------------------------------------------===//
4689 // sdwa
4690 //===----------------------------------------------------------------------===//
4691 
4692 OperandMatchResultTy
4693 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4694                               AMDGPUOperand::ImmTy Type) {
4695   using namespace llvm::AMDGPU::SDWA;
4696 
4697   SMLoc S = Parser.getTok().getLoc();
4698   StringRef Value;
4699   OperandMatchResultTy res;
4700 
4701   res = parseStringWithPrefix(Prefix, Value);
4702   if (res != MatchOperand_Success) {
4703     return res;
4704   }
4705 
4706   int64_t Int;
4707   Int = StringSwitch<int64_t>(Value)
4708         .Case("BYTE_0", SdwaSel::BYTE_0)
4709         .Case("BYTE_1", SdwaSel::BYTE_1)
4710         .Case("BYTE_2", SdwaSel::BYTE_2)
4711         .Case("BYTE_3", SdwaSel::BYTE_3)
4712         .Case("WORD_0", SdwaSel::WORD_0)
4713         .Case("WORD_1", SdwaSel::WORD_1)
4714         .Case("DWORD", SdwaSel::DWORD)
4715         .Default(0xffffffff);
4716   Parser.Lex(); // eat last token
4717 
4718   if (Int == 0xffffffff) {
4719     return MatchOperand_ParseFail;
4720   }
4721 
4722   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4723   return MatchOperand_Success;
4724 }
4725 
4726 OperandMatchResultTy
4727 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4728   using namespace llvm::AMDGPU::SDWA;
4729 
4730   SMLoc S = Parser.getTok().getLoc();
4731   StringRef Value;
4732   OperandMatchResultTy res;
4733 
4734   res = parseStringWithPrefix("dst_unused", Value);
4735   if (res != MatchOperand_Success) {
4736     return res;
4737   }
4738 
4739   int64_t Int;
4740   Int = StringSwitch<int64_t>(Value)
4741         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4742         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4743         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4744         .Default(0xffffffff);
4745   Parser.Lex(); // eat last token
4746 
4747   if (Int == 0xffffffff) {
4748     return MatchOperand_ParseFail;
4749   }
4750 
4751   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4752   return MatchOperand_Success;
4753 }
4754 
4755 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4756   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4757 }
4758 
4759 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4760   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4761 }
4762 
4763 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4764   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4765 }
4766 
4767 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4768   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4769 }
4770 
4771 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4772                               uint64_t BasicInstType, bool skipVcc) {
4773   using namespace llvm::AMDGPU::SDWA;
4774 
4775   OptionalImmIndexMap OptionalIdx;
4776   bool skippedVcc = false;
4777 
4778   unsigned I = 1;
4779   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4780   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4781     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4782   }
4783 
4784   for (unsigned E = Operands.size(); I != E; ++I) {
4785     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4786     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4787       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4788       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4789       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4790       // Skip VCC only if we didn't skip it on previous iteration.
4791       if (BasicInstType == SIInstrFlags::VOP2 &&
4792           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4793         skippedVcc = true;
4794         continue;
4795       } else if (BasicInstType == SIInstrFlags::VOPC &&
4796                  Inst.getNumOperands() == 0) {
4797         skippedVcc = true;
4798         continue;
4799       }
4800     }
4801     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4802       Op.addRegWithInputModsOperands(Inst, 2);
4803     } else if (Op.isImm()) {
4804       // Handle optional arguments
4805       OptionalIdx[Op.getImmTy()] = I;
4806     } else {
4807       llvm_unreachable("Invalid operand type");
4808     }
4809     skippedVcc = false;
4810   }
4811 
4812   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4813       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4814     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4815     switch (BasicInstType) {
4816     case SIInstrFlags::VOP1:
4817       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4818       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4819         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4820       }
4821       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4822       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4823       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4824       break;
4825 
4826     case SIInstrFlags::VOP2:
4827       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4828       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4829         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4830       }
4831       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4832       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4833       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4834       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4835       break;
4836 
4837     case SIInstrFlags::VOPC:
4838       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4839       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4840       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4841       break;
4842 
4843     default:
4844       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4845     }
4846   }
4847 
4848   // special case v_mac_{f16, f32}:
4849   // it has src2 register operand that is tied to dst operand
4850   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4851       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4852     auto it = Inst.begin();
4853     std::advance(
4854       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4855     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4856   }
4857 }
4858 
4859 /// Force static initialization.
4860 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4861   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4862   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4863 }
4864 
4865 #define GET_REGISTER_MATCHER
4866 #define GET_MATCHER_IMPLEMENTATION
4867 #define GET_MNEMONIC_SPELL_CHECKER
4868 #include "AMDGPUGenAsmMatcher.inc"
4869 
4870 // This fuction should be defined after auto-generated include so that we have
4871 // MatchClassKind enum defined
4872 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4873                                                      unsigned Kind) {
4874   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4875   // But MatchInstructionImpl() expects to meet token and fails to validate
4876   // operand. This method checks if we are given immediate operand but expect to
4877   // get corresponding token.
4878   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4879   switch (Kind) {
4880   case MCK_addr64:
4881     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4882   case MCK_gds:
4883     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4884   case MCK_glc:
4885     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4886   case MCK_idxen:
4887     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4888   case MCK_offen:
4889     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4890   case MCK_SSrcB32:
4891     // When operands have expression values, they will return true for isToken,
4892     // because it is not possible to distinguish between a token and an
4893     // expression at parse time. MatchInstructionImpl() will always try to
4894     // match an operand as a token, when isToken returns true, and when the
4895     // name of the expression is not a valid token, the match will fail,
4896     // so we need to handle it here.
4897     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4898   case MCK_SSrcF32:
4899     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4900   case MCK_SoppBrTarget:
4901     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4902   case MCK_VReg32OrOff:
4903     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4904   case MCK_InterpSlot:
4905     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4906   case MCK_Attr:
4907     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4908   case MCK_AttrChan:
4909     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4910   default:
4911     return Match_InvalidOperand;
4912   }
4913 }
4914