1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/CodeGen/MachineValueType.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MathExtras.h"
49 #include "llvm/Support/SMLoc.h"
50 #include "llvm/Support/TargetRegistry.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <cstdint>
55 #include <cstring>
56 #include <iterator>
57 #include <map>
58 #include <memory>
59 #include <string>
60 
61 using namespace llvm;
62 using namespace llvm::AMDGPU;
63 
64 namespace {
65 
66 class AMDGPUAsmParser;
67 
68 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
69 
70 //===----------------------------------------------------------------------===//
71 // Operand
72 //===----------------------------------------------------------------------===//
73 
74 class AMDGPUOperand : public MCParsedAsmOperand {
75   enum KindTy {
76     Token,
77     Immediate,
78     Register,
79     Expression
80   } Kind;
81 
82   SMLoc StartLoc, EndLoc;
83   const AMDGPUAsmParser *AsmParser;
84 
85 public:
86   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
87     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
88 
89   using Ptr = std::unique_ptr<AMDGPUOperand>;
90 
91   struct Modifiers {
92     bool Abs = false;
93     bool Neg = false;
94     bool Sext = false;
95 
96     bool hasFPModifiers() const { return Abs || Neg; }
97     bool hasIntModifiers() const { return Sext; }
98     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
99 
100     int64_t getFPModifiersOperand() const {
101       int64_t Operand = 0;
102       Operand |= Abs ? SISrcMods::ABS : 0;
103       Operand |= Neg ? SISrcMods::NEG : 0;
104       return Operand;
105     }
106 
107     int64_t getIntModifiersOperand() const {
108       int64_t Operand = 0;
109       Operand |= Sext ? SISrcMods::SEXT : 0;
110       return Operand;
111     }
112 
113     int64_t getModifiersOperand() const {
114       assert(!(hasFPModifiers() && hasIntModifiers())
115            && "fp and int modifiers should not be used simultaneously");
116       if (hasFPModifiers()) {
117         return getFPModifiersOperand();
118       } else if (hasIntModifiers()) {
119         return getIntModifiersOperand();
120       } else {
121         return 0;
122       }
123     }
124 
125     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
126   };
127 
128   enum ImmTy {
129     ImmTyNone,
130     ImmTyGDS,
131     ImmTyOffen,
132     ImmTyIdxen,
133     ImmTyAddr64,
134     ImmTyOffset,
135     ImmTyInstOffset,
136     ImmTyOffset0,
137     ImmTyOffset1,
138     ImmTyGLC,
139     ImmTySLC,
140     ImmTyTFE,
141     ImmTyClampSI,
142     ImmTyOModSI,
143     ImmTyDppCtrl,
144     ImmTyDppRowMask,
145     ImmTyDppBankMask,
146     ImmTyDppBoundCtrl,
147     ImmTySdwaDstSel,
148     ImmTySdwaSrc0Sel,
149     ImmTySdwaSrc1Sel,
150     ImmTySdwaDstUnused,
151     ImmTyDMask,
152     ImmTyUNorm,
153     ImmTyDA,
154     ImmTyR128,
155     ImmTyLWE,
156     ImmTyExpTgt,
157     ImmTyExpCompr,
158     ImmTyExpVM,
159     ImmTyDFMT,
160     ImmTyNFMT,
161     ImmTyHwreg,
162     ImmTyOff,
163     ImmTySendMsg,
164     ImmTyInterpSlot,
165     ImmTyInterpAttr,
166     ImmTyAttrChan,
167     ImmTyOpSel,
168     ImmTyOpSelHi,
169     ImmTyNegLo,
170     ImmTyNegHi,
171     ImmTySwizzle,
172     ImmTyHigh
173   };
174 
175   struct TokOp {
176     const char *Data;
177     unsigned Length;
178   };
179 
180   struct ImmOp {
181     int64_t Val;
182     ImmTy Type;
183     bool IsFPImm;
184     Modifiers Mods;
185   };
186 
187   struct RegOp {
188     unsigned RegNo;
189     bool IsForcedVOP3;
190     Modifiers Mods;
191   };
192 
193   union {
194     TokOp Tok;
195     ImmOp Imm;
196     RegOp Reg;
197     const MCExpr *Expr;
198   };
199 
200   bool isToken() const override {
201     if (Kind == Token)
202       return true;
203 
204     if (Kind != Expression || !Expr)
205       return false;
206 
207     // When parsing operands, we can't always tell if something was meant to be
208     // a token, like 'gds', or an expression that references a global variable.
209     // In this case, we assume the string is an expression, and if we need to
210     // interpret is a token, then we treat the symbol name as the token.
211     return isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   bool isInlinableImm(MVT type) const;
219   bool isLiteralImm(MVT type) const;
220 
221   bool isRegKind() const {
222     return Kind == Register;
223   }
224 
225   bool isReg() const override {
226     return isRegKind() && !hasModifiers();
227   }
228 
229   bool isRegOrImmWithInputMods(MVT type) const {
230     return isRegKind() || isInlinableImm(type);
231   }
232 
233   bool isRegOrImmWithInt16InputMods() const {
234     return isRegOrImmWithInputMods(MVT::i16);
235   }
236 
237   bool isRegOrImmWithInt32InputMods() const {
238     return isRegOrImmWithInputMods(MVT::i32);
239   }
240 
241   bool isRegOrImmWithInt64InputMods() const {
242     return isRegOrImmWithInputMods(MVT::i64);
243   }
244 
245   bool isRegOrImmWithFP16InputMods() const {
246     return isRegOrImmWithInputMods(MVT::f16);
247   }
248 
249   bool isRegOrImmWithFP32InputMods() const {
250     return isRegOrImmWithInputMods(MVT::f32);
251   }
252 
253   bool isRegOrImmWithFP64InputMods() const {
254     return isRegOrImmWithInputMods(MVT::f64);
255   }
256 
257   bool isVReg() const {
258     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
259            isRegClass(AMDGPU::VReg_64RegClassID) ||
260            isRegClass(AMDGPU::VReg_96RegClassID) ||
261            isRegClass(AMDGPU::VReg_128RegClassID) ||
262            isRegClass(AMDGPU::VReg_256RegClassID) ||
263            isRegClass(AMDGPU::VReg_512RegClassID);
264   }
265 
266   bool isVReg32OrOff() const {
267     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
268   }
269 
270   bool isSDWARegKind() const;
271 
272   bool isImmTy(ImmTy ImmT) const {
273     return isImm() && Imm.Type == ImmT;
274   }
275 
276   bool isImmModifier() const {
277     return isImm() && Imm.Type != ImmTyNone;
278   }
279 
280   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
281   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
282   bool isDMask() const { return isImmTy(ImmTyDMask); }
283   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
284   bool isDA() const { return isImmTy(ImmTyDA); }
285   bool isR128() const { return isImmTy(ImmTyUNorm); }
286   bool isLWE() const { return isImmTy(ImmTyLWE); }
287   bool isOff() const { return isImmTy(ImmTyOff); }
288   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
289   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
290   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
291   bool isOffen() const { return isImmTy(ImmTyOffen); }
292   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
293   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
294   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
295   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
296   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
297 
298   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
299   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
300   bool isGDS() const { return isImmTy(ImmTyGDS); }
301   bool isGLC() const { return isImmTy(ImmTyGLC); }
302   bool isSLC() const { return isImmTy(ImmTySLC); }
303   bool isTFE() const { return isImmTy(ImmTyTFE); }
304   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
305   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
306   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
307   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
308   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
309   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
310   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
311   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
312   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
313   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
314   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
315   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
316   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
317   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
318   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
319   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
320   bool isHigh() const { return isImmTy(ImmTyHigh); }
321 
322   bool isMod() const {
323     return isClampSI() || isOModSI();
324   }
325 
326   bool isRegOrImm() const {
327     return isReg() || isImm();
328   }
329 
330   bool isRegClass(unsigned RCID) const;
331 
332   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
333     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
334   }
335 
336   bool isSCSrcB16() const {
337     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
338   }
339 
340   bool isSCSrcV2B16() const {
341     return isSCSrcB16();
342   }
343 
344   bool isSCSrcB32() const {
345     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
346   }
347 
348   bool isSCSrcB64() const {
349     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
350   }
351 
352   bool isSCSrcF16() const {
353     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
354   }
355 
356   bool isSCSrcV2F16() const {
357     return isSCSrcF16();
358   }
359 
360   bool isSCSrcF32() const {
361     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
362   }
363 
364   bool isSCSrcF64() const {
365     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
366   }
367 
368   bool isSSrcB32() const {
369     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
370   }
371 
372   bool isSSrcB16() const {
373     return isSCSrcB16() || isLiteralImm(MVT::i16);
374   }
375 
376   bool isSSrcV2B16() const {
377     llvm_unreachable("cannot happen");
378     return isSSrcB16();
379   }
380 
381   bool isSSrcB64() const {
382     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
383     // See isVSrc64().
384     return isSCSrcB64() || isLiteralImm(MVT::i64);
385   }
386 
387   bool isSSrcF32() const {
388     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
389   }
390 
391   bool isSSrcF64() const {
392     return isSCSrcB64() || isLiteralImm(MVT::f64);
393   }
394 
395   bool isSSrcF16() const {
396     return isSCSrcB16() || isLiteralImm(MVT::f16);
397   }
398 
399   bool isSSrcV2F16() const {
400     llvm_unreachable("cannot happen");
401     return isSSrcF16();
402   }
403 
404   bool isVCSrcB32() const {
405     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
406   }
407 
408   bool isVCSrcB64() const {
409     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
410   }
411 
412   bool isVCSrcB16() const {
413     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
414   }
415 
416   bool isVCSrcV2B16() const {
417     return isVCSrcB16();
418   }
419 
420   bool isVCSrcF32() const {
421     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
422   }
423 
424   bool isVCSrcF64() const {
425     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
426   }
427 
428   bool isVCSrcF16() const {
429     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
430   }
431 
432   bool isVCSrcV2F16() const {
433     return isVCSrcF16();
434   }
435 
436   bool isVSrcB32() const {
437     return isVCSrcF32() || isLiteralImm(MVT::i32);
438   }
439 
440   bool isVSrcB64() const {
441     return isVCSrcF64() || isLiteralImm(MVT::i64);
442   }
443 
444   bool isVSrcB16() const {
445     return isVCSrcF16() || isLiteralImm(MVT::i16);
446   }
447 
448   bool isVSrcV2B16() const {
449     llvm_unreachable("cannot happen");
450     return isVSrcB16();
451   }
452 
453   bool isVSrcF32() const {
454     return isVCSrcF32() || isLiteralImm(MVT::f32);
455   }
456 
457   bool isVSrcF64() const {
458     return isVCSrcF64() || isLiteralImm(MVT::f64);
459   }
460 
461   bool isVSrcF16() const {
462     return isVCSrcF16() || isLiteralImm(MVT::f16);
463   }
464 
465   bool isVSrcV2F16() const {
466     llvm_unreachable("cannot happen");
467     return isVSrcF16();
468   }
469 
470   bool isKImmFP32() const {
471     return isLiteralImm(MVT::f32);
472   }
473 
474   bool isKImmFP16() const {
475     return isLiteralImm(MVT::f16);
476   }
477 
478   bool isMem() const override {
479     return false;
480   }
481 
482   bool isExpr() const {
483     return Kind == Expression;
484   }
485 
486   bool isSoppBrTarget() const {
487     return isExpr() || isImm();
488   }
489 
490   bool isSWaitCnt() const;
491   bool isHwreg() const;
492   bool isSendMsg() const;
493   bool isSwizzle() const;
494   bool isSMRDOffset8() const;
495   bool isSMRDOffset20() const;
496   bool isSMRDLiteralOffset() const;
497   bool isDPPCtrl() const;
498   bool isGPRIdxMode() const;
499   bool isS16Imm() const;
500   bool isU16Imm() const;
501 
502   StringRef getExpressionAsToken() const {
503     assert(isExpr());
504     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
505     return S->getSymbol().getName();
506   }
507 
508   StringRef getToken() const {
509     assert(isToken());
510 
511     if (Kind == Expression)
512       return getExpressionAsToken();
513 
514     return StringRef(Tok.Data, Tok.Length);
515   }
516 
517   int64_t getImm() const {
518     assert(isImm());
519     return Imm.Val;
520   }
521 
522   ImmTy getImmTy() const {
523     assert(isImm());
524     return Imm.Type;
525   }
526 
527   unsigned getReg() const override {
528     return Reg.RegNo;
529   }
530 
531   SMLoc getStartLoc() const override {
532     return StartLoc;
533   }
534 
535   SMLoc getEndLoc() const override {
536     return EndLoc;
537   }
538 
539   Modifiers getModifiers() const {
540     assert(isRegKind() || isImmTy(ImmTyNone));
541     return isRegKind() ? Reg.Mods : Imm.Mods;
542   }
543 
544   void setModifiers(Modifiers Mods) {
545     assert(isRegKind() || isImmTy(ImmTyNone));
546     if (isRegKind())
547       Reg.Mods = Mods;
548     else
549       Imm.Mods = Mods;
550   }
551 
552   bool hasModifiers() const {
553     return getModifiers().hasModifiers();
554   }
555 
556   bool hasFPModifiers() const {
557     return getModifiers().hasFPModifiers();
558   }
559 
560   bool hasIntModifiers() const {
561     return getModifiers().hasIntModifiers();
562   }
563 
564   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
565 
566   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
567 
568   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
569 
570   template <unsigned Bitwidth>
571   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
572 
573   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
574     addKImmFPOperands<16>(Inst, N);
575   }
576 
577   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
578     addKImmFPOperands<32>(Inst, N);
579   }
580 
581   void addRegOperands(MCInst &Inst, unsigned N) const;
582 
583   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
584     if (isRegKind())
585       addRegOperands(Inst, N);
586     else if (isExpr())
587       Inst.addOperand(MCOperand::createExpr(Expr));
588     else
589       addImmOperands(Inst, N);
590   }
591 
592   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
593     Modifiers Mods = getModifiers();
594     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
595     if (isRegKind()) {
596       addRegOperands(Inst, N);
597     } else {
598       addImmOperands(Inst, N, false);
599     }
600   }
601 
602   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
603     assert(!hasIntModifiers());
604     addRegOrImmWithInputModsOperands(Inst, N);
605   }
606 
607   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
608     assert(!hasFPModifiers());
609     addRegOrImmWithInputModsOperands(Inst, N);
610   }
611 
612   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
613     Modifiers Mods = getModifiers();
614     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
615     assert(isRegKind());
616     addRegOperands(Inst, N);
617   }
618 
619   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
620     assert(!hasIntModifiers());
621     addRegWithInputModsOperands(Inst, N);
622   }
623 
624   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
625     assert(!hasFPModifiers());
626     addRegWithInputModsOperands(Inst, N);
627   }
628 
629   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
630     if (isImm())
631       addImmOperands(Inst, N);
632     else {
633       assert(isExpr());
634       Inst.addOperand(MCOperand::createExpr(Expr));
635     }
636   }
637 
638   static void printImmTy(raw_ostream& OS, ImmTy Type) {
639     switch (Type) {
640     case ImmTyNone: OS << "None"; break;
641     case ImmTyGDS: OS << "GDS"; break;
642     case ImmTyOffen: OS << "Offen"; break;
643     case ImmTyIdxen: OS << "Idxen"; break;
644     case ImmTyAddr64: OS << "Addr64"; break;
645     case ImmTyOffset: OS << "Offset"; break;
646     case ImmTyInstOffset: OS << "InstOffset"; break;
647     case ImmTyOffset0: OS << "Offset0"; break;
648     case ImmTyOffset1: OS << "Offset1"; break;
649     case ImmTyGLC: OS << "GLC"; break;
650     case ImmTySLC: OS << "SLC"; break;
651     case ImmTyTFE: OS << "TFE"; break;
652     case ImmTyDFMT: OS << "DFMT"; break;
653     case ImmTyNFMT: OS << "NFMT"; break;
654     case ImmTyClampSI: OS << "ClampSI"; break;
655     case ImmTyOModSI: OS << "OModSI"; break;
656     case ImmTyDppCtrl: OS << "DppCtrl"; break;
657     case ImmTyDppRowMask: OS << "DppRowMask"; break;
658     case ImmTyDppBankMask: OS << "DppBankMask"; break;
659     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
660     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
661     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
662     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
663     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
664     case ImmTyDMask: OS << "DMask"; break;
665     case ImmTyUNorm: OS << "UNorm"; break;
666     case ImmTyDA: OS << "DA"; break;
667     case ImmTyR128: OS << "R128"; break;
668     case ImmTyLWE: OS << "LWE"; break;
669     case ImmTyOff: OS << "Off"; break;
670     case ImmTyExpTgt: OS << "ExpTgt"; break;
671     case ImmTyExpCompr: OS << "ExpCompr"; break;
672     case ImmTyExpVM: OS << "ExpVM"; break;
673     case ImmTyHwreg: OS << "Hwreg"; break;
674     case ImmTySendMsg: OS << "SendMsg"; break;
675     case ImmTyInterpSlot: OS << "InterpSlot"; break;
676     case ImmTyInterpAttr: OS << "InterpAttr"; break;
677     case ImmTyAttrChan: OS << "AttrChan"; break;
678     case ImmTyOpSel: OS << "OpSel"; break;
679     case ImmTyOpSelHi: OS << "OpSelHi"; break;
680     case ImmTyNegLo: OS << "NegLo"; break;
681     case ImmTyNegHi: OS << "NegHi"; break;
682     case ImmTySwizzle: OS << "Swizzle"; break;
683     case ImmTyHigh: OS << "High"; break;
684     }
685   }
686 
687   void print(raw_ostream &OS) const override {
688     switch (Kind) {
689     case Register:
690       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
691       break;
692     case Immediate:
693       OS << '<' << getImm();
694       if (getImmTy() != ImmTyNone) {
695         OS << " type: "; printImmTy(OS, getImmTy());
696       }
697       OS << " mods: " << Imm.Mods << '>';
698       break;
699     case Token:
700       OS << '\'' << getToken() << '\'';
701       break;
702     case Expression:
703       OS << "<expr " << *Expr << '>';
704       break;
705     }
706   }
707 
708   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
709                                       int64_t Val, SMLoc Loc,
710                                       ImmTy Type = ImmTyNone,
711                                       bool IsFPImm = false) {
712     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
713     Op->Imm.Val = Val;
714     Op->Imm.IsFPImm = IsFPImm;
715     Op->Imm.Type = Type;
716     Op->Imm.Mods = Modifiers();
717     Op->StartLoc = Loc;
718     Op->EndLoc = Loc;
719     return Op;
720   }
721 
722   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
723                                         StringRef Str, SMLoc Loc,
724                                         bool HasExplicitEncodingSize = true) {
725     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
726     Res->Tok.Data = Str.data();
727     Res->Tok.Length = Str.size();
728     Res->StartLoc = Loc;
729     Res->EndLoc = Loc;
730     return Res;
731   }
732 
733   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
734                                       unsigned RegNo, SMLoc S,
735                                       SMLoc E,
736                                       bool ForceVOP3) {
737     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
738     Op->Reg.RegNo = RegNo;
739     Op->Reg.Mods = Modifiers();
740     Op->Reg.IsForcedVOP3 = ForceVOP3;
741     Op->StartLoc = S;
742     Op->EndLoc = E;
743     return Op;
744   }
745 
746   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
747                                        const class MCExpr *Expr, SMLoc S) {
748     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
749     Op->Expr = Expr;
750     Op->StartLoc = S;
751     Op->EndLoc = S;
752     return Op;
753   }
754 };
755 
756 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
757   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
758   return OS;
759 }
760 
761 //===----------------------------------------------------------------------===//
762 // AsmParser
763 //===----------------------------------------------------------------------===//
764 
765 // Holds info related to the current kernel, e.g. count of SGPRs used.
766 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
767 // .amdgpu_hsa_kernel or at EOF.
768 class KernelScopeInfo {
769   int SgprIndexUnusedMin = -1;
770   int VgprIndexUnusedMin = -1;
771   MCContext *Ctx = nullptr;
772 
773   void usesSgprAt(int i) {
774     if (i >= SgprIndexUnusedMin) {
775       SgprIndexUnusedMin = ++i;
776       if (Ctx) {
777         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
778         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
779       }
780     }
781   }
782 
783   void usesVgprAt(int i) {
784     if (i >= VgprIndexUnusedMin) {
785       VgprIndexUnusedMin = ++i;
786       if (Ctx) {
787         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
788         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
789       }
790     }
791   }
792 
793 public:
794   KernelScopeInfo() = default;
795 
796   void initialize(MCContext &Context) {
797     Ctx = &Context;
798     usesSgprAt(SgprIndexUnusedMin = -1);
799     usesVgprAt(VgprIndexUnusedMin = -1);
800   }
801 
802   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
803     switch (RegKind) {
804       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
805       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
806       default: break;
807     }
808   }
809 };
810 
811 class AMDGPUAsmParser : public MCTargetAsmParser {
812   MCAsmParser &Parser;
813 
814   unsigned ForcedEncodingSize = 0;
815   bool ForcedDPP = false;
816   bool ForcedSDWA = false;
817   KernelScopeInfo KernelScope;
818 
819   /// @name Auto-generated Match Functions
820   /// {
821 
822 #define GET_ASSEMBLER_HEADER
823 #include "AMDGPUGenAsmMatcher.inc"
824 
825   /// }
826 
827 private:
828   bool ParseAsAbsoluteExpression(uint32_t &Ret);
829   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
830   bool ParseDirectiveHSACodeObjectVersion();
831   bool ParseDirectiveHSACodeObjectISA();
832   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
833   bool ParseDirectiveAMDKernelCodeT();
834   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
835   bool ParseDirectiveAMDGPUHsaKernel();
836 
837   bool ParseDirectiveISAVersion();
838   bool ParseDirectiveHSAMetadata();
839   bool ParseDirectivePALMetadata();
840 
841   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
842                              RegisterKind RegKind, unsigned Reg1,
843                              unsigned RegNum);
844   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
845                            unsigned& RegNum, unsigned& RegWidth,
846                            unsigned *DwordRegIndex);
847   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
848                     bool IsAtomic, bool IsAtomicReturn);
849   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
850                  bool IsGdsHardcoded);
851 
852 public:
853   enum AMDGPUMatchResultTy {
854     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
855   };
856 
857   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
858 
859   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
860                const MCInstrInfo &MII,
861                const MCTargetOptions &Options)
862       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
863     MCAsmParserExtension::Initialize(Parser);
864 
865     if (getFeatureBits().none()) {
866       // Set default features.
867       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
868     }
869 
870     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
871 
872     {
873       // TODO: make those pre-defined variables read-only.
874       // Currently there is none suitable machinery in the core llvm-mc for this.
875       // MCSymbol::isRedefinable is intended for another purpose, and
876       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
877       AMDGPU::IsaInfo::IsaVersion ISA =
878           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
879       MCContext &Ctx = getContext();
880       MCSymbol *Sym =
881           Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
882       Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
883       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
884       Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
885       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
886       Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
887     }
888     KernelScope.initialize(getContext());
889   }
890 
891   bool isSI() const {
892     return AMDGPU::isSI(getSTI());
893   }
894 
895   bool isCI() const {
896     return AMDGPU::isCI(getSTI());
897   }
898 
899   bool isVI() const {
900     return AMDGPU::isVI(getSTI());
901   }
902 
903   bool isGFX9() const {
904     return AMDGPU::isGFX9(getSTI());
905   }
906 
907   bool hasInv2PiInlineImm() const {
908     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
909   }
910 
911   bool hasFlatOffsets() const {
912     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
913   }
914 
915   bool hasSGPR102_SGPR103() const {
916     return !isVI();
917   }
918 
919   bool hasIntClamp() const {
920     return getFeatureBits()[AMDGPU::FeatureIntClamp];
921   }
922 
923   AMDGPUTargetStreamer &getTargetStreamer() {
924     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
925     return static_cast<AMDGPUTargetStreamer &>(TS);
926   }
927 
928   const MCRegisterInfo *getMRI() const {
929     // We need this const_cast because for some reason getContext() is not const
930     // in MCAsmParser.
931     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
932   }
933 
934   const MCInstrInfo *getMII() const {
935     return &MII;
936   }
937 
938   const FeatureBitset &getFeatureBits() const {
939     return getSTI().getFeatureBits();
940   }
941 
942   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
943   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
944   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
945 
946   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
947   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
948   bool isForcedDPP() const { return ForcedDPP; }
949   bool isForcedSDWA() const { return ForcedSDWA; }
950   ArrayRef<unsigned> getMatchedVariants() const;
951 
952   std::unique_ptr<AMDGPUOperand> parseRegister();
953   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
954   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
955   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
956                                       unsigned Kind) override;
957   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
958                                OperandVector &Operands, MCStreamer &Out,
959                                uint64_t &ErrorInfo,
960                                bool MatchingInlineAsm) override;
961   bool ParseDirective(AsmToken DirectiveID) override;
962   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
963   StringRef parseMnemonicSuffix(StringRef Name);
964   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
965                         SMLoc NameLoc, OperandVector &Operands) override;
966   //bool ProcessInstruction(MCInst &Inst);
967 
968   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
969 
970   OperandMatchResultTy
971   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
972                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
973                      bool (*ConvertResult)(int64_t &) = nullptr);
974 
975   OperandMatchResultTy parseOperandArrayWithPrefix(
976     const char *Prefix,
977     OperandVector &Operands,
978     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
979     bool (*ConvertResult)(int64_t&) = nullptr);
980 
981   OperandMatchResultTy
982   parseNamedBit(const char *Name, OperandVector &Operands,
983                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
984   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
985                                              StringRef &Value);
986 
987   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
988   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
989   OperandMatchResultTy parseReg(OperandVector &Operands);
990   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
991   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
992   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
993   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
994   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
995   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
996 
997   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
998   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
999   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1000   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1001 
1002   bool parseCnt(int64_t &IntVal);
1003   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1004   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1005 
1006 private:
1007   struct OperandInfoTy {
1008     int64_t Id;
1009     bool IsSymbolic = false;
1010 
1011     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1012   };
1013 
1014   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1015   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1016 
1017   void errorExpTgt();
1018   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1019 
1020   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1021   bool validateConstantBusLimitations(const MCInst &Inst);
1022   bool validateEarlyClobberLimitations(const MCInst &Inst);
1023   bool validateIntClampSupported(const MCInst &Inst);
1024   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1025   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1026   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1027 
1028   bool trySkipId(const StringRef Id);
1029   bool trySkipToken(const AsmToken::TokenKind Kind);
1030   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1031   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1032   bool parseExpr(int64_t &Imm);
1033 
1034 public:
1035   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1036 
1037   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1038   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1039   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1040   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1041   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1042 
1043   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1044                             const unsigned MinVal,
1045                             const unsigned MaxVal,
1046                             const StringRef ErrMsg);
1047   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1048   bool parseSwizzleOffset(int64_t &Imm);
1049   bool parseSwizzleMacro(int64_t &Imm);
1050   bool parseSwizzleQuadPerm(int64_t &Imm);
1051   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1052   bool parseSwizzleBroadcast(int64_t &Imm);
1053   bool parseSwizzleSwap(int64_t &Imm);
1054   bool parseSwizzleReverse(int64_t &Imm);
1055 
1056   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1057   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1058   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1059   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1060 
1061   AMDGPUOperand::Ptr defaultGLC() const;
1062   AMDGPUOperand::Ptr defaultSLC() const;
1063   AMDGPUOperand::Ptr defaultTFE() const;
1064 
1065   AMDGPUOperand::Ptr defaultDMask() const;
1066   AMDGPUOperand::Ptr defaultUNorm() const;
1067   AMDGPUOperand::Ptr defaultDA() const;
1068   AMDGPUOperand::Ptr defaultR128() const;
1069   AMDGPUOperand::Ptr defaultLWE() const;
1070   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1071   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1072   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1073   AMDGPUOperand::Ptr defaultOffsetU12() const;
1074   AMDGPUOperand::Ptr defaultOffsetS13() const;
1075 
1076   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1077 
1078   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1079                OptionalImmIndexMap &OptionalIdx);
1080   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1081   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1082   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1083 
1084   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1085 
1086   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1087                bool IsAtomic = false);
1088   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1089 
1090   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1091   AMDGPUOperand::Ptr defaultRowMask() const;
1092   AMDGPUOperand::Ptr defaultBankMask() const;
1093   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1094   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1095 
1096   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1097                                     AMDGPUOperand::ImmTy Type);
1098   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1099   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1100   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1101   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1102   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1103   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1104                 uint64_t BasicInstType, bool skipVcc = false);
1105 };
1106 
1107 struct OptionalOperand {
1108   const char *Name;
1109   AMDGPUOperand::ImmTy Type;
1110   bool IsBit;
1111   bool (*ConvertResult)(int64_t&);
1112 };
1113 
1114 } // end anonymous namespace
1115 
1116 // May be called with integer type with equivalent bitwidth.
1117 static const fltSemantics *getFltSemantics(unsigned Size) {
1118   switch (Size) {
1119   case 4:
1120     return &APFloat::IEEEsingle();
1121   case 8:
1122     return &APFloat::IEEEdouble();
1123   case 2:
1124     return &APFloat::IEEEhalf();
1125   default:
1126     llvm_unreachable("unsupported fp type");
1127   }
1128 }
1129 
1130 static const fltSemantics *getFltSemantics(MVT VT) {
1131   return getFltSemantics(VT.getSizeInBits() / 8);
1132 }
1133 
1134 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1135   switch (OperandType) {
1136   case AMDGPU::OPERAND_REG_IMM_INT32:
1137   case AMDGPU::OPERAND_REG_IMM_FP32:
1138   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1139   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1140     return &APFloat::IEEEsingle();
1141   case AMDGPU::OPERAND_REG_IMM_INT64:
1142   case AMDGPU::OPERAND_REG_IMM_FP64:
1143   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1144   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1145     return &APFloat::IEEEdouble();
1146   case AMDGPU::OPERAND_REG_IMM_INT16:
1147   case AMDGPU::OPERAND_REG_IMM_FP16:
1148   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1149   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1150   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1151   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1152     return &APFloat::IEEEhalf();
1153   default:
1154     llvm_unreachable("unsupported fp type");
1155   }
1156 }
1157 
1158 //===----------------------------------------------------------------------===//
1159 // Operand
1160 //===----------------------------------------------------------------------===//
1161 
1162 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1163   bool Lost;
1164 
1165   // Convert literal to single precision
1166   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1167                                                APFloat::rmNearestTiesToEven,
1168                                                &Lost);
1169   // We allow precision lost but not overflow or underflow
1170   if (Status != APFloat::opOK &&
1171       Lost &&
1172       ((Status & APFloat::opOverflow)  != 0 ||
1173        (Status & APFloat::opUnderflow) != 0)) {
1174     return false;
1175   }
1176 
1177   return true;
1178 }
1179 
1180 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1181   if (!isImmTy(ImmTyNone)) {
1182     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1183     return false;
1184   }
1185   // TODO: We should avoid using host float here. It would be better to
1186   // check the float bit values which is what a few other places do.
1187   // We've had bot failures before due to weird NaN support on mips hosts.
1188 
1189   APInt Literal(64, Imm.Val);
1190 
1191   if (Imm.IsFPImm) { // We got fp literal token
1192     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1193       return AMDGPU::isInlinableLiteral64(Imm.Val,
1194                                           AsmParser->hasInv2PiInlineImm());
1195     }
1196 
1197     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1198     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1199       return false;
1200 
1201     if (type.getScalarSizeInBits() == 16) {
1202       return AMDGPU::isInlinableLiteral16(
1203         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1204         AsmParser->hasInv2PiInlineImm());
1205     }
1206 
1207     // Check if single precision literal is inlinable
1208     return AMDGPU::isInlinableLiteral32(
1209       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1210       AsmParser->hasInv2PiInlineImm());
1211   }
1212 
1213   // We got int literal token.
1214   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1215     return AMDGPU::isInlinableLiteral64(Imm.Val,
1216                                         AsmParser->hasInv2PiInlineImm());
1217   }
1218 
1219   if (type.getScalarSizeInBits() == 16) {
1220     return AMDGPU::isInlinableLiteral16(
1221       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1222       AsmParser->hasInv2PiInlineImm());
1223   }
1224 
1225   return AMDGPU::isInlinableLiteral32(
1226     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1227     AsmParser->hasInv2PiInlineImm());
1228 }
1229 
1230 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1231   // Check that this immediate can be added as literal
1232   if (!isImmTy(ImmTyNone)) {
1233     return false;
1234   }
1235 
1236   if (!Imm.IsFPImm) {
1237     // We got int literal token.
1238 
1239     if (type == MVT::f64 && hasFPModifiers()) {
1240       // Cannot apply fp modifiers to int literals preserving the same semantics
1241       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1242       // disable these cases.
1243       return false;
1244     }
1245 
1246     unsigned Size = type.getSizeInBits();
1247     if (Size == 64)
1248       Size = 32;
1249 
1250     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1251     // types.
1252     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1253   }
1254 
1255   // We got fp literal token
1256   if (type == MVT::f64) { // Expected 64-bit fp operand
1257     // We would set low 64-bits of literal to zeroes but we accept this literals
1258     return true;
1259   }
1260 
1261   if (type == MVT::i64) { // Expected 64-bit int operand
1262     // We don't allow fp literals in 64-bit integer instructions. It is
1263     // unclear how we should encode them.
1264     return false;
1265   }
1266 
1267   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1268   return canLosslesslyConvertToFPType(FPLiteral, type);
1269 }
1270 
1271 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1272   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1273 }
1274 
1275 bool AMDGPUOperand::isSDWARegKind() const {
1276   if (AsmParser->isVI())
1277     return isVReg();
1278   else if (AsmParser->isGFX9())
1279     return isRegKind();
1280   else
1281     return false;
1282 }
1283 
1284 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1285 {
1286   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1287   assert(Size == 2 || Size == 4 || Size == 8);
1288 
1289   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1290 
1291   if (Imm.Mods.Abs) {
1292     Val &= ~FpSignMask;
1293   }
1294   if (Imm.Mods.Neg) {
1295     Val ^= FpSignMask;
1296   }
1297 
1298   return Val;
1299 }
1300 
1301 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1302   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1303                              Inst.getNumOperands())) {
1304     addLiteralImmOperand(Inst, Imm.Val,
1305                          ApplyModifiers &
1306                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1307   } else {
1308     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1309     Inst.addOperand(MCOperand::createImm(Imm.Val));
1310   }
1311 }
1312 
1313 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1314   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1315   auto OpNum = Inst.getNumOperands();
1316   // Check that this operand accepts literals
1317   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1318 
1319   if (ApplyModifiers) {
1320     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1321     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1322     Val = applyInputFPModifiers(Val, Size);
1323   }
1324 
1325   APInt Literal(64, Val);
1326   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1327 
1328   if (Imm.IsFPImm) { // We got fp literal token
1329     switch (OpTy) {
1330     case AMDGPU::OPERAND_REG_IMM_INT64:
1331     case AMDGPU::OPERAND_REG_IMM_FP64:
1332     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1333     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1334       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1335                                        AsmParser->hasInv2PiInlineImm())) {
1336         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1337         return;
1338       }
1339 
1340       // Non-inlineable
1341       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1342         // For fp operands we check if low 32 bits are zeros
1343         if (Literal.getLoBits(32) != 0) {
1344           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1345           "Can't encode literal as exact 64-bit floating-point operand. "
1346           "Low 32-bits will be set to zero");
1347         }
1348 
1349         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1350         return;
1351       }
1352 
1353       // We don't allow fp literals in 64-bit integer instructions. It is
1354       // unclear how we should encode them. This case should be checked earlier
1355       // in predicate methods (isLiteralImm())
1356       llvm_unreachable("fp literal in 64-bit integer instruction.");
1357 
1358     case AMDGPU::OPERAND_REG_IMM_INT32:
1359     case AMDGPU::OPERAND_REG_IMM_FP32:
1360     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1361     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1362     case AMDGPU::OPERAND_REG_IMM_INT16:
1363     case AMDGPU::OPERAND_REG_IMM_FP16:
1364     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1365     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1366     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1367     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1368       bool lost;
1369       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1370       // Convert literal to single precision
1371       FPLiteral.convert(*getOpFltSemantics(OpTy),
1372                         APFloat::rmNearestTiesToEven, &lost);
1373       // We allow precision lost but not overflow or underflow. This should be
1374       // checked earlier in isLiteralImm()
1375 
1376       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1377       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1378           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1379         ImmVal |= (ImmVal << 16);
1380       }
1381 
1382       Inst.addOperand(MCOperand::createImm(ImmVal));
1383       return;
1384     }
1385     default:
1386       llvm_unreachable("invalid operand size");
1387     }
1388 
1389     return;
1390   }
1391 
1392    // We got int literal token.
1393   // Only sign extend inline immediates.
1394   // FIXME: No errors on truncation
1395   switch (OpTy) {
1396   case AMDGPU::OPERAND_REG_IMM_INT32:
1397   case AMDGPU::OPERAND_REG_IMM_FP32:
1398   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1399   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1400     if (isInt<32>(Val) &&
1401         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1402                                      AsmParser->hasInv2PiInlineImm())) {
1403       Inst.addOperand(MCOperand::createImm(Val));
1404       return;
1405     }
1406 
1407     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1408     return;
1409 
1410   case AMDGPU::OPERAND_REG_IMM_INT64:
1411   case AMDGPU::OPERAND_REG_IMM_FP64:
1412   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1413   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1414     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1415       Inst.addOperand(MCOperand::createImm(Val));
1416       return;
1417     }
1418 
1419     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1420     return;
1421 
1422   case AMDGPU::OPERAND_REG_IMM_INT16:
1423   case AMDGPU::OPERAND_REG_IMM_FP16:
1424   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1425   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1426     if (isInt<16>(Val) &&
1427         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1428                                      AsmParser->hasInv2PiInlineImm())) {
1429       Inst.addOperand(MCOperand::createImm(Val));
1430       return;
1431     }
1432 
1433     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1434     return;
1435 
1436   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1437   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1438     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1439     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1440                                         AsmParser->hasInv2PiInlineImm()));
1441 
1442     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1443                       static_cast<uint32_t>(LiteralVal);
1444     Inst.addOperand(MCOperand::createImm(ImmVal));
1445     return;
1446   }
1447   default:
1448     llvm_unreachable("invalid operand size");
1449   }
1450 }
1451 
1452 template <unsigned Bitwidth>
1453 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1454   APInt Literal(64, Imm.Val);
1455 
1456   if (!Imm.IsFPImm) {
1457     // We got int literal token.
1458     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1459     return;
1460   }
1461 
1462   bool Lost;
1463   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1464   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1465                     APFloat::rmNearestTiesToEven, &Lost);
1466   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1467 }
1468 
1469 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1470   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1471 }
1472 
1473 //===----------------------------------------------------------------------===//
1474 // AsmParser
1475 //===----------------------------------------------------------------------===//
1476 
1477 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1478   if (Is == IS_VGPR) {
1479     switch (RegWidth) {
1480       default: return -1;
1481       case 1: return AMDGPU::VGPR_32RegClassID;
1482       case 2: return AMDGPU::VReg_64RegClassID;
1483       case 3: return AMDGPU::VReg_96RegClassID;
1484       case 4: return AMDGPU::VReg_128RegClassID;
1485       case 8: return AMDGPU::VReg_256RegClassID;
1486       case 16: return AMDGPU::VReg_512RegClassID;
1487     }
1488   } else if (Is == IS_TTMP) {
1489     switch (RegWidth) {
1490       default: return -1;
1491       case 1: return AMDGPU::TTMP_32RegClassID;
1492       case 2: return AMDGPU::TTMP_64RegClassID;
1493       case 4: return AMDGPU::TTMP_128RegClassID;
1494     }
1495   } else if (Is == IS_SGPR) {
1496     switch (RegWidth) {
1497       default: return -1;
1498       case 1: return AMDGPU::SGPR_32RegClassID;
1499       case 2: return AMDGPU::SGPR_64RegClassID;
1500       case 4: return AMDGPU::SGPR_128RegClassID;
1501       case 8: return AMDGPU::SReg_256RegClassID;
1502       case 16: return AMDGPU::SReg_512RegClassID;
1503     }
1504   }
1505   return -1;
1506 }
1507 
1508 static unsigned getSpecialRegForName(StringRef RegName) {
1509   return StringSwitch<unsigned>(RegName)
1510     .Case("exec", AMDGPU::EXEC)
1511     .Case("vcc", AMDGPU::VCC)
1512     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1513     .Case("m0", AMDGPU::M0)
1514     .Case("scc", AMDGPU::SCC)
1515     .Case("tba", AMDGPU::TBA)
1516     .Case("tma", AMDGPU::TMA)
1517     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1518     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1519     .Case("vcc_lo", AMDGPU::VCC_LO)
1520     .Case("vcc_hi", AMDGPU::VCC_HI)
1521     .Case("exec_lo", AMDGPU::EXEC_LO)
1522     .Case("exec_hi", AMDGPU::EXEC_HI)
1523     .Case("tma_lo", AMDGPU::TMA_LO)
1524     .Case("tma_hi", AMDGPU::TMA_HI)
1525     .Case("tba_lo", AMDGPU::TBA_LO)
1526     .Case("tba_hi", AMDGPU::TBA_HI)
1527     .Default(0);
1528 }
1529 
1530 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1531                                     SMLoc &EndLoc) {
1532   auto R = parseRegister();
1533   if (!R) return true;
1534   assert(R->isReg());
1535   RegNo = R->getReg();
1536   StartLoc = R->getStartLoc();
1537   EndLoc = R->getEndLoc();
1538   return false;
1539 }
1540 
1541 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1542                                             RegisterKind RegKind, unsigned Reg1,
1543                                             unsigned RegNum) {
1544   switch (RegKind) {
1545   case IS_SPECIAL:
1546     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1547       Reg = AMDGPU::EXEC;
1548       RegWidth = 2;
1549       return true;
1550     }
1551     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1552       Reg = AMDGPU::FLAT_SCR;
1553       RegWidth = 2;
1554       return true;
1555     }
1556     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1557       Reg = AMDGPU::VCC;
1558       RegWidth = 2;
1559       return true;
1560     }
1561     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1562       Reg = AMDGPU::TBA;
1563       RegWidth = 2;
1564       return true;
1565     }
1566     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1567       Reg = AMDGPU::TMA;
1568       RegWidth = 2;
1569       return true;
1570     }
1571     return false;
1572   case IS_VGPR:
1573   case IS_SGPR:
1574   case IS_TTMP:
1575     if (Reg1 != Reg + RegWidth) {
1576       return false;
1577     }
1578     RegWidth++;
1579     return true;
1580   default:
1581     llvm_unreachable("unexpected register kind");
1582   }
1583 }
1584 
1585 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1586                                           unsigned &RegNum, unsigned &RegWidth,
1587                                           unsigned *DwordRegIndex) {
1588   if (DwordRegIndex) { *DwordRegIndex = 0; }
1589   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1590   if (getLexer().is(AsmToken::Identifier)) {
1591     StringRef RegName = Parser.getTok().getString();
1592     if ((Reg = getSpecialRegForName(RegName))) {
1593       Parser.Lex();
1594       RegKind = IS_SPECIAL;
1595     } else {
1596       unsigned RegNumIndex = 0;
1597       if (RegName[0] == 'v') {
1598         RegNumIndex = 1;
1599         RegKind = IS_VGPR;
1600       } else if (RegName[0] == 's') {
1601         RegNumIndex = 1;
1602         RegKind = IS_SGPR;
1603       } else if (RegName.startswith("ttmp")) {
1604         RegNumIndex = strlen("ttmp");
1605         RegKind = IS_TTMP;
1606       } else {
1607         return false;
1608       }
1609       if (RegName.size() > RegNumIndex) {
1610         // Single 32-bit register: vXX.
1611         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1612           return false;
1613         Parser.Lex();
1614         RegWidth = 1;
1615       } else {
1616         // Range of registers: v[XX:YY]. ":YY" is optional.
1617         Parser.Lex();
1618         int64_t RegLo, RegHi;
1619         if (getLexer().isNot(AsmToken::LBrac))
1620           return false;
1621         Parser.Lex();
1622 
1623         if (getParser().parseAbsoluteExpression(RegLo))
1624           return false;
1625 
1626         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1627         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1628           return false;
1629         Parser.Lex();
1630 
1631         if (isRBrace) {
1632           RegHi = RegLo;
1633         } else {
1634           if (getParser().parseAbsoluteExpression(RegHi))
1635             return false;
1636 
1637           if (getLexer().isNot(AsmToken::RBrac))
1638             return false;
1639           Parser.Lex();
1640         }
1641         RegNum = (unsigned) RegLo;
1642         RegWidth = (RegHi - RegLo) + 1;
1643       }
1644     }
1645   } else if (getLexer().is(AsmToken::LBrac)) {
1646     // List of consecutive registers: [s0,s1,s2,s3]
1647     Parser.Lex();
1648     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1649       return false;
1650     if (RegWidth != 1)
1651       return false;
1652     RegisterKind RegKind1;
1653     unsigned Reg1, RegNum1, RegWidth1;
1654     do {
1655       if (getLexer().is(AsmToken::Comma)) {
1656         Parser.Lex();
1657       } else if (getLexer().is(AsmToken::RBrac)) {
1658         Parser.Lex();
1659         break;
1660       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1661         if (RegWidth1 != 1) {
1662           return false;
1663         }
1664         if (RegKind1 != RegKind) {
1665           return false;
1666         }
1667         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1668           return false;
1669         }
1670       } else {
1671         return false;
1672       }
1673     } while (true);
1674   } else {
1675     return false;
1676   }
1677   switch (RegKind) {
1678   case IS_SPECIAL:
1679     RegNum = 0;
1680     RegWidth = 1;
1681     break;
1682   case IS_VGPR:
1683   case IS_SGPR:
1684   case IS_TTMP:
1685   {
1686     unsigned Size = 1;
1687     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1688       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1689       Size = std::min(RegWidth, 4u);
1690     }
1691     if (RegNum % Size != 0)
1692       return false;
1693     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1694     RegNum = RegNum / Size;
1695     int RCID = getRegClass(RegKind, RegWidth);
1696     if (RCID == -1)
1697       return false;
1698     const MCRegisterClass RC = TRI->getRegClass(RCID);
1699     if (RegNum >= RC.getNumRegs())
1700       return false;
1701     Reg = RC.getRegister(RegNum);
1702     break;
1703   }
1704 
1705   default:
1706     llvm_unreachable("unexpected register kind");
1707   }
1708 
1709   if (!subtargetHasRegister(*TRI, Reg))
1710     return false;
1711   return true;
1712 }
1713 
1714 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1715   const auto &Tok = Parser.getTok();
1716   SMLoc StartLoc = Tok.getLoc();
1717   SMLoc EndLoc = Tok.getEndLoc();
1718   RegisterKind RegKind;
1719   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1720 
1721   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1722     return nullptr;
1723   }
1724   KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1725   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1726 }
1727 
1728 bool
1729 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1730   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1731       (getLexer().getKind() == AsmToken::Integer ||
1732        getLexer().getKind() == AsmToken::Real)) {
1733     // This is a workaround for handling operands like these:
1734     //     |1.0|
1735     //     |-1|
1736     // This syntax is not compatible with syntax of standard
1737     // MC expressions (due to the trailing '|').
1738 
1739     SMLoc EndLoc;
1740     const MCExpr *Expr;
1741 
1742     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1743       return true;
1744     }
1745 
1746     return !Expr->evaluateAsAbsolute(Val);
1747   }
1748 
1749   return getParser().parseAbsoluteExpression(Val);
1750 }
1751 
1752 OperandMatchResultTy
1753 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1754   // TODO: add syntactic sugar for 1/(2*PI)
1755   bool Minus = false;
1756   if (getLexer().getKind() == AsmToken::Minus) {
1757     Minus = true;
1758     Parser.Lex();
1759   }
1760 
1761   SMLoc S = Parser.getTok().getLoc();
1762   switch(getLexer().getKind()) {
1763   case AsmToken::Integer: {
1764     int64_t IntVal;
1765     if (parseAbsoluteExpr(IntVal, AbsMod))
1766       return MatchOperand_ParseFail;
1767     if (Minus)
1768       IntVal *= -1;
1769     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1770     return MatchOperand_Success;
1771   }
1772   case AsmToken::Real: {
1773     int64_t IntVal;
1774     if (parseAbsoluteExpr(IntVal, AbsMod))
1775       return MatchOperand_ParseFail;
1776 
1777     APFloat F(BitsToDouble(IntVal));
1778     if (Minus)
1779       F.changeSign();
1780     Operands.push_back(
1781         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1782                                  AMDGPUOperand::ImmTyNone, true));
1783     return MatchOperand_Success;
1784   }
1785   default:
1786     return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch;
1787   }
1788 }
1789 
1790 OperandMatchResultTy
1791 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1792   if (auto R = parseRegister()) {
1793     assert(R->isReg());
1794     R->Reg.IsForcedVOP3 = isForcedVOP3();
1795     Operands.push_back(std::move(R));
1796     return MatchOperand_Success;
1797   }
1798   return MatchOperand_NoMatch;
1799 }
1800 
1801 OperandMatchResultTy
1802 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1803   auto res = parseImm(Operands, AbsMod);
1804   if (res != MatchOperand_NoMatch) {
1805     return res;
1806   }
1807 
1808   return parseReg(Operands);
1809 }
1810 
1811 OperandMatchResultTy
1812 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1813                                               bool AllowImm) {
1814   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1815 
1816   if (getLexer().getKind()== AsmToken::Minus) {
1817     const AsmToken NextToken = getLexer().peekTok();
1818 
1819     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1820     if (NextToken.is(AsmToken::Minus)) {
1821       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1822       return MatchOperand_ParseFail;
1823     }
1824 
1825     // '-' followed by an integer literal N should be interpreted as integer
1826     // negation rather than a floating-point NEG modifier applied to N.
1827     // Beside being contr-intuitive, such use of floating-point NEG modifier
1828     // results in different meaning of integer literals used with VOP1/2/C
1829     // and VOP3, for example:
1830     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1831     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1832     // Negative fp literals should be handled likewise for unifomtity
1833     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1834       Parser.Lex();
1835       Negate = true;
1836     }
1837   }
1838 
1839   if (getLexer().getKind() == AsmToken::Identifier &&
1840       Parser.getTok().getString() == "neg") {
1841     if (Negate) {
1842       Error(Parser.getTok().getLoc(), "expected register or immediate");
1843       return MatchOperand_ParseFail;
1844     }
1845     Parser.Lex();
1846     Negate2 = true;
1847     if (getLexer().isNot(AsmToken::LParen)) {
1848       Error(Parser.getTok().getLoc(), "expected left paren after neg");
1849       return MatchOperand_ParseFail;
1850     }
1851     Parser.Lex();
1852   }
1853 
1854   if (getLexer().getKind() == AsmToken::Identifier &&
1855       Parser.getTok().getString() == "abs") {
1856     Parser.Lex();
1857     Abs2 = true;
1858     if (getLexer().isNot(AsmToken::LParen)) {
1859       Error(Parser.getTok().getLoc(), "expected left paren after abs");
1860       return MatchOperand_ParseFail;
1861     }
1862     Parser.Lex();
1863   }
1864 
1865   if (getLexer().getKind() == AsmToken::Pipe) {
1866     if (Abs2) {
1867       Error(Parser.getTok().getLoc(), "expected register or immediate");
1868       return MatchOperand_ParseFail;
1869     }
1870     Parser.Lex();
1871     Abs = true;
1872   }
1873 
1874   OperandMatchResultTy Res;
1875   if (AllowImm) {
1876     Res = parseRegOrImm(Operands, Abs);
1877   } else {
1878     Res = parseReg(Operands);
1879   }
1880   if (Res != MatchOperand_Success) {
1881     return Res;
1882   }
1883 
1884   AMDGPUOperand::Modifiers Mods;
1885   if (Abs) {
1886     if (getLexer().getKind() != AsmToken::Pipe) {
1887       Error(Parser.getTok().getLoc(), "expected vertical bar");
1888       return MatchOperand_ParseFail;
1889     }
1890     Parser.Lex();
1891     Mods.Abs = true;
1892   }
1893   if (Abs2) {
1894     if (getLexer().isNot(AsmToken::RParen)) {
1895       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1896       return MatchOperand_ParseFail;
1897     }
1898     Parser.Lex();
1899     Mods.Abs = true;
1900   }
1901 
1902   if (Negate) {
1903     Mods.Neg = true;
1904   } else if (Negate2) {
1905     if (getLexer().isNot(AsmToken::RParen)) {
1906       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1907       return MatchOperand_ParseFail;
1908     }
1909     Parser.Lex();
1910     Mods.Neg = true;
1911   }
1912 
1913   if (Mods.hasFPModifiers()) {
1914     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1915     Op.setModifiers(Mods);
1916   }
1917   return MatchOperand_Success;
1918 }
1919 
1920 OperandMatchResultTy
1921 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
1922                                                bool AllowImm) {
1923   bool Sext = false;
1924 
1925   if (getLexer().getKind() == AsmToken::Identifier &&
1926       Parser.getTok().getString() == "sext") {
1927     Parser.Lex();
1928     Sext = true;
1929     if (getLexer().isNot(AsmToken::LParen)) {
1930       Error(Parser.getTok().getLoc(), "expected left paren after sext");
1931       return MatchOperand_ParseFail;
1932     }
1933     Parser.Lex();
1934   }
1935 
1936   OperandMatchResultTy Res;
1937   if (AllowImm) {
1938     Res = parseRegOrImm(Operands);
1939   } else {
1940     Res = parseReg(Operands);
1941   }
1942   if (Res != MatchOperand_Success) {
1943     return Res;
1944   }
1945 
1946   AMDGPUOperand::Modifiers Mods;
1947   if (Sext) {
1948     if (getLexer().isNot(AsmToken::RParen)) {
1949       Error(Parser.getTok().getLoc(), "expected closing parentheses");
1950       return MatchOperand_ParseFail;
1951     }
1952     Parser.Lex();
1953     Mods.Sext = true;
1954   }
1955 
1956   if (Mods.hasIntModifiers()) {
1957     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
1958     Op.setModifiers(Mods);
1959   }
1960 
1961   return MatchOperand_Success;
1962 }
1963 
1964 OperandMatchResultTy
1965 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
1966   return parseRegOrImmWithFPInputMods(Operands, false);
1967 }
1968 
1969 OperandMatchResultTy
1970 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
1971   return parseRegOrImmWithIntInputMods(Operands, false);
1972 }
1973 
1974 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
1975   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
1976   if (Reg) {
1977     Operands.push_back(std::move(Reg));
1978     return MatchOperand_Success;
1979   }
1980 
1981   const AsmToken &Tok = Parser.getTok();
1982   if (Tok.getString() == "off") {
1983     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
1984                                                 AMDGPUOperand::ImmTyOff, false));
1985     Parser.Lex();
1986     return MatchOperand_Success;
1987   }
1988 
1989   return MatchOperand_NoMatch;
1990 }
1991 
1992 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
1993   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
1994 
1995   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
1996       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
1997       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
1998       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
1999     return Match_InvalidOperand;
2000 
2001   if ((TSFlags & SIInstrFlags::VOP3) &&
2002       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2003       getForcedEncodingSize() != 64)
2004     return Match_PreferE32;
2005 
2006   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2007       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2008     // v_mac_f32/16 allow only dst_sel == DWORD;
2009     auto OpNum =
2010         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2011     const auto &Op = Inst.getOperand(OpNum);
2012     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2013       return Match_InvalidOperand;
2014     }
2015   }
2016 
2017   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2018     // FIXME: Produces error without correct column reported.
2019     auto OpNum =
2020         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2021     const auto &Op = Inst.getOperand(OpNum);
2022     if (Op.getImm() != 0)
2023       return Match_InvalidOperand;
2024   }
2025 
2026   return Match_Success;
2027 }
2028 
2029 // What asm variants we should check
2030 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2031   if (getForcedEncodingSize() == 32) {
2032     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2033     return makeArrayRef(Variants);
2034   }
2035 
2036   if (isForcedVOP3()) {
2037     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2038     return makeArrayRef(Variants);
2039   }
2040 
2041   if (isForcedSDWA()) {
2042     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2043                                         AMDGPUAsmVariants::SDWA9};
2044     return makeArrayRef(Variants);
2045   }
2046 
2047   if (isForcedDPP()) {
2048     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2049     return makeArrayRef(Variants);
2050   }
2051 
2052   static const unsigned Variants[] = {
2053     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2054     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2055   };
2056 
2057   return makeArrayRef(Variants);
2058 }
2059 
2060 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2061   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2062   const unsigned Num = Desc.getNumImplicitUses();
2063   for (unsigned i = 0; i < Num; ++i) {
2064     unsigned Reg = Desc.ImplicitUses[i];
2065     switch (Reg) {
2066     case AMDGPU::FLAT_SCR:
2067     case AMDGPU::VCC:
2068     case AMDGPU::M0:
2069       return Reg;
2070     default:
2071       break;
2072     }
2073   }
2074   return AMDGPU::NoRegister;
2075 }
2076 
2077 // NB: This code is correct only when used to check constant
2078 // bus limitations because GFX7 support no f16 inline constants.
2079 // Note that there are no cases when a GFX7 opcode violates
2080 // constant bus limitations due to the use of an f16 constant.
2081 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2082                                        unsigned OpIdx) const {
2083   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2084 
2085   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2086     return false;
2087   }
2088 
2089   const MCOperand &MO = Inst.getOperand(OpIdx);
2090 
2091   int64_t Val = MO.getImm();
2092   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2093 
2094   switch (OpSize) { // expected operand size
2095   case 8:
2096     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2097   case 4:
2098     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2099   case 2: {
2100     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2101     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2102         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2103       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2104     } else {
2105       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2106     }
2107   }
2108   default:
2109     llvm_unreachable("invalid operand size");
2110   }
2111 }
2112 
2113 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2114   const MCOperand &MO = Inst.getOperand(OpIdx);
2115   if (MO.isImm()) {
2116     return !isInlineConstant(Inst, OpIdx);
2117   }
2118   return !MO.isReg() ||
2119          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2120 }
2121 
2122 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2123   const unsigned Opcode = Inst.getOpcode();
2124   const MCInstrDesc &Desc = MII.get(Opcode);
2125   unsigned ConstantBusUseCount = 0;
2126 
2127   if (Desc.TSFlags &
2128       (SIInstrFlags::VOPC |
2129        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2130        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2131        SIInstrFlags::SDWA)) {
2132     // Check special imm operands (used by madmk, etc)
2133     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2134       ++ConstantBusUseCount;
2135     }
2136 
2137     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2138     if (SGPRUsed != AMDGPU::NoRegister) {
2139       ++ConstantBusUseCount;
2140     }
2141 
2142     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2143     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2144     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2145 
2146     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2147 
2148     for (int OpIdx : OpIndices) {
2149       if (OpIdx == -1) break;
2150 
2151       const MCOperand &MO = Inst.getOperand(OpIdx);
2152       if (usesConstantBus(Inst, OpIdx)) {
2153         if (MO.isReg()) {
2154           const unsigned Reg = mc2PseudoReg(MO.getReg());
2155           // Pairs of registers with a partial intersections like these
2156           //   s0, s[0:1]
2157           //   flat_scratch_lo, flat_scratch
2158           //   flat_scratch_lo, flat_scratch_hi
2159           // are theoretically valid but they are disabled anyway.
2160           // Note that this code mimics SIInstrInfo::verifyInstruction
2161           if (Reg != SGPRUsed) {
2162             ++ConstantBusUseCount;
2163           }
2164           SGPRUsed = Reg;
2165         } else { // Expression or a literal
2166           ++ConstantBusUseCount;
2167         }
2168       }
2169     }
2170   }
2171 
2172   return ConstantBusUseCount <= 1;
2173 }
2174 
2175 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2176   const unsigned Opcode = Inst.getOpcode();
2177   const MCInstrDesc &Desc = MII.get(Opcode);
2178 
2179   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2180   if (DstIdx == -1 ||
2181       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2182     return true;
2183   }
2184 
2185   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2186 
2187   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2188   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2189   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2190 
2191   assert(DstIdx != -1);
2192   const MCOperand &Dst = Inst.getOperand(DstIdx);
2193   assert(Dst.isReg());
2194   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2195 
2196   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2197 
2198   for (int SrcIdx : SrcIndices) {
2199     if (SrcIdx == -1) break;
2200     const MCOperand &Src = Inst.getOperand(SrcIdx);
2201     if (Src.isReg()) {
2202       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2203       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2204         return false;
2205       }
2206     }
2207   }
2208 
2209   return true;
2210 }
2211 
2212 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2213 
2214   const unsigned Opc = Inst.getOpcode();
2215   const MCInstrDesc &Desc = MII.get(Opc);
2216 
2217   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2218     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2219     assert(ClampIdx != -1);
2220     return Inst.getOperand(ClampIdx).getImm() == 0;
2221   }
2222 
2223   return true;
2224 }
2225 
2226 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2227                                           const SMLoc &IDLoc) {
2228   if (!validateConstantBusLimitations(Inst)) {
2229     Error(IDLoc,
2230       "invalid operand (violates constant bus restrictions)");
2231     return false;
2232   }
2233   if (!validateEarlyClobberLimitations(Inst)) {
2234     Error(IDLoc,
2235       "destination must be different than all sources");
2236     return false;
2237   }
2238   if (!validateIntClampSupported(Inst)) {
2239     Error(IDLoc,
2240       "integer clamping is not supported on this GPU");
2241     return false;
2242   }
2243 
2244   return true;
2245 }
2246 
2247 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2248                                               OperandVector &Operands,
2249                                               MCStreamer &Out,
2250                                               uint64_t &ErrorInfo,
2251                                               bool MatchingInlineAsm) {
2252   MCInst Inst;
2253   unsigned Result = Match_Success;
2254   for (auto Variant : getMatchedVariants()) {
2255     uint64_t EI;
2256     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2257                                   Variant);
2258     // We order match statuses from least to most specific. We use most specific
2259     // status as resulting
2260     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2261     if ((R == Match_Success) ||
2262         (R == Match_PreferE32) ||
2263         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2264         (R == Match_InvalidOperand && Result != Match_MissingFeature
2265                                    && Result != Match_PreferE32) ||
2266         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2267                                    && Result != Match_MissingFeature
2268                                    && Result != Match_PreferE32)) {
2269       Result = R;
2270       ErrorInfo = EI;
2271     }
2272     if (R == Match_Success)
2273       break;
2274   }
2275 
2276   switch (Result) {
2277   default: break;
2278   case Match_Success:
2279     if (!validateInstruction(Inst, IDLoc)) {
2280       return true;
2281     }
2282     Inst.setLoc(IDLoc);
2283     Out.EmitInstruction(Inst, getSTI());
2284     return false;
2285 
2286   case Match_MissingFeature:
2287     return Error(IDLoc, "instruction not supported on this GPU");
2288 
2289   case Match_MnemonicFail:
2290     return Error(IDLoc, "unrecognized instruction mnemonic");
2291 
2292   case Match_InvalidOperand: {
2293     SMLoc ErrorLoc = IDLoc;
2294     if (ErrorInfo != ~0ULL) {
2295       if (ErrorInfo >= Operands.size()) {
2296         return Error(IDLoc, "too few operands for instruction");
2297       }
2298       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2299       if (ErrorLoc == SMLoc())
2300         ErrorLoc = IDLoc;
2301     }
2302     return Error(ErrorLoc, "invalid operand for instruction");
2303   }
2304 
2305   case Match_PreferE32:
2306     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2307                         "should be encoded as e32");
2308   }
2309   llvm_unreachable("Implement any new match types added!");
2310 }
2311 
2312 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2313   int64_t Tmp = -1;
2314   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2315     return true;
2316   }
2317   if (getParser().parseAbsoluteExpression(Tmp)) {
2318     return true;
2319   }
2320   Ret = static_cast<uint32_t>(Tmp);
2321   return false;
2322 }
2323 
2324 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2325                                                uint32_t &Minor) {
2326   if (ParseAsAbsoluteExpression(Major))
2327     return TokError("invalid major version");
2328 
2329   if (getLexer().isNot(AsmToken::Comma))
2330     return TokError("minor version number required, comma expected");
2331   Lex();
2332 
2333   if (ParseAsAbsoluteExpression(Minor))
2334     return TokError("invalid minor version");
2335 
2336   return false;
2337 }
2338 
2339 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2340   uint32_t Major;
2341   uint32_t Minor;
2342 
2343   if (ParseDirectiveMajorMinor(Major, Minor))
2344     return true;
2345 
2346   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2347   return false;
2348 }
2349 
2350 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2351   uint32_t Major;
2352   uint32_t Minor;
2353   uint32_t Stepping;
2354   StringRef VendorName;
2355   StringRef ArchName;
2356 
2357   // If this directive has no arguments, then use the ISA version for the
2358   // targeted GPU.
2359   if (getLexer().is(AsmToken::EndOfStatement)) {
2360     AMDGPU::IsaInfo::IsaVersion ISA =
2361         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2362     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2363                                                       ISA.Stepping,
2364                                                       "AMD", "AMDGPU");
2365     return false;
2366   }
2367 
2368   if (ParseDirectiveMajorMinor(Major, Minor))
2369     return true;
2370 
2371   if (getLexer().isNot(AsmToken::Comma))
2372     return TokError("stepping version number required, comma expected");
2373   Lex();
2374 
2375   if (ParseAsAbsoluteExpression(Stepping))
2376     return TokError("invalid stepping version");
2377 
2378   if (getLexer().isNot(AsmToken::Comma))
2379     return TokError("vendor name required, comma expected");
2380   Lex();
2381 
2382   if (getLexer().isNot(AsmToken::String))
2383     return TokError("invalid vendor name");
2384 
2385   VendorName = getLexer().getTok().getStringContents();
2386   Lex();
2387 
2388   if (getLexer().isNot(AsmToken::Comma))
2389     return TokError("arch name required, comma expected");
2390   Lex();
2391 
2392   if (getLexer().isNot(AsmToken::String))
2393     return TokError("invalid arch name");
2394 
2395   ArchName = getLexer().getTok().getStringContents();
2396   Lex();
2397 
2398   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2399                                                     VendorName, ArchName);
2400   return false;
2401 }
2402 
2403 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2404                                                amd_kernel_code_t &Header) {
2405   SmallString<40> ErrStr;
2406   raw_svector_ostream Err(ErrStr);
2407   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2408     return TokError(Err.str());
2409   }
2410   Lex();
2411   return false;
2412 }
2413 
2414 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
2415   amd_kernel_code_t Header;
2416   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
2417 
2418   while (true) {
2419     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
2420     // will set the current token to EndOfStatement.
2421     while(getLexer().is(AsmToken::EndOfStatement))
2422       Lex();
2423 
2424     if (getLexer().isNot(AsmToken::Identifier))
2425       return TokError("expected value identifier or .end_amd_kernel_code_t");
2426 
2427     StringRef ID = getLexer().getTok().getIdentifier();
2428     Lex();
2429 
2430     if (ID == ".end_amd_kernel_code_t")
2431       break;
2432 
2433     if (ParseAMDKernelCodeTValue(ID, Header))
2434       return true;
2435   }
2436 
2437   getTargetStreamer().EmitAMDKernelCodeT(Header);
2438 
2439   return false;
2440 }
2441 
2442 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
2443   if (getLexer().isNot(AsmToken::Identifier))
2444     return TokError("expected symbol name");
2445 
2446   StringRef KernelName = Parser.getTok().getString();
2447 
2448   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
2449                                            ELF::STT_AMDGPU_HSA_KERNEL);
2450   Lex();
2451   KernelScope.initialize(getContext());
2452   return false;
2453 }
2454 
2455 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
2456   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
2457     return Error(getParser().getTok().getLoc(),
2458                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
2459                  "architectures");
2460   }
2461 
2462   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
2463 
2464   std::string ISAVersionStringFromSTI;
2465   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
2466   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
2467 
2468   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
2469     return Error(getParser().getTok().getLoc(),
2470                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
2471                  "arguments specified through the command line");
2472   }
2473 
2474   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
2475   Lex();
2476 
2477   return false;
2478 }
2479 
2480 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
2481   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
2482     return Error(getParser().getTok().getLoc(),
2483                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
2484                  "not available on non-amdhsa OSes")).str());
2485   }
2486 
2487   std::string HSAMetadataString;
2488   raw_string_ostream YamlStream(HSAMetadataString);
2489 
2490   getLexer().setSkipSpace(false);
2491 
2492   bool FoundEnd = false;
2493   while (!getLexer().is(AsmToken::Eof)) {
2494     while (getLexer().is(AsmToken::Space)) {
2495       YamlStream << getLexer().getTok().getString();
2496       Lex();
2497     }
2498 
2499     if (getLexer().is(AsmToken::Identifier)) {
2500       StringRef ID = getLexer().getTok().getIdentifier();
2501       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
2502         Lex();
2503         FoundEnd = true;
2504         break;
2505       }
2506     }
2507 
2508     YamlStream << Parser.parseStringToEndOfStatement()
2509                << getContext().getAsmInfo()->getSeparatorString();
2510 
2511     Parser.eatToEndOfStatement();
2512   }
2513 
2514   getLexer().setSkipSpace(true);
2515 
2516   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
2517     return TokError(Twine("expected directive ") +
2518                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
2519   }
2520 
2521   YamlStream.flush();
2522 
2523   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
2524     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
2525 
2526   return false;
2527 }
2528 
2529 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
2530   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
2531     return Error(getParser().getTok().getLoc(),
2532                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
2533                  "not available on non-amdpal OSes")).str());
2534   }
2535 
2536   PALMD::Metadata PALMetadata;
2537   for (;;) {
2538     uint32_t Value;
2539     if (ParseAsAbsoluteExpression(Value)) {
2540       return TokError(Twine("invalid value in ") +
2541                       Twine(PALMD::AssemblerDirective));
2542     }
2543     PALMetadata.push_back(Value);
2544     if (getLexer().isNot(AsmToken::Comma))
2545       break;
2546     Lex();
2547   }
2548   getTargetStreamer().EmitPALMetadata(PALMetadata);
2549   return false;
2550 }
2551 
2552 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
2553   StringRef IDVal = DirectiveID.getString();
2554 
2555   if (IDVal == ".hsa_code_object_version")
2556     return ParseDirectiveHSACodeObjectVersion();
2557 
2558   if (IDVal == ".hsa_code_object_isa")
2559     return ParseDirectiveHSACodeObjectISA();
2560 
2561   if (IDVal == ".amd_kernel_code_t")
2562     return ParseDirectiveAMDKernelCodeT();
2563 
2564   if (IDVal == ".amdgpu_hsa_kernel")
2565     return ParseDirectiveAMDGPUHsaKernel();
2566 
2567   if (IDVal == ".amd_amdgpu_isa")
2568     return ParseDirectiveISAVersion();
2569 
2570   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
2571     return ParseDirectiveHSAMetadata();
2572 
2573   if (IDVal == PALMD::AssemblerDirective)
2574     return ParseDirectivePALMetadata();
2575 
2576   return true;
2577 }
2578 
2579 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
2580                                            unsigned RegNo) const {
2581   if (isCI())
2582     return true;
2583 
2584   if (isSI()) {
2585     // No flat_scr
2586     switch (RegNo) {
2587     case AMDGPU::FLAT_SCR:
2588     case AMDGPU::FLAT_SCR_LO:
2589     case AMDGPU::FLAT_SCR_HI:
2590       return false;
2591     default:
2592       return true;
2593     }
2594   }
2595 
2596   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
2597   // SI/CI have.
2598   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
2599        R.isValid(); ++R) {
2600     if (*R == RegNo)
2601       return false;
2602   }
2603 
2604   return true;
2605 }
2606 
2607 OperandMatchResultTy
2608 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
2609   // Try to parse with a custom parser
2610   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
2611 
2612   // If we successfully parsed the operand or if there as an error parsing,
2613   // we are done.
2614   //
2615   // If we are parsing after we reach EndOfStatement then this means we
2616   // are appending default values to the Operands list.  This is only done
2617   // by custom parser, so we shouldn't continue on to the generic parsing.
2618   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
2619       getLexer().is(AsmToken::EndOfStatement))
2620     return ResTy;
2621 
2622   ResTy = parseRegOrImm(Operands);
2623 
2624   if (ResTy == MatchOperand_Success)
2625     return ResTy;
2626 
2627   const auto &Tok = Parser.getTok();
2628   SMLoc S = Tok.getLoc();
2629 
2630   const MCExpr *Expr = nullptr;
2631   if (!Parser.parseExpression(Expr)) {
2632     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2633     return MatchOperand_Success;
2634   }
2635 
2636   // Possibly this is an instruction flag like 'gds'.
2637   if (Tok.getKind() == AsmToken::Identifier) {
2638     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
2639     Parser.Lex();
2640     return MatchOperand_Success;
2641   }
2642 
2643   return MatchOperand_NoMatch;
2644 }
2645 
2646 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
2647   // Clear any forced encodings from the previous instruction.
2648   setForcedEncodingSize(0);
2649   setForcedDPP(false);
2650   setForcedSDWA(false);
2651 
2652   if (Name.endswith("_e64")) {
2653     setForcedEncodingSize(64);
2654     return Name.substr(0, Name.size() - 4);
2655   } else if (Name.endswith("_e32")) {
2656     setForcedEncodingSize(32);
2657     return Name.substr(0, Name.size() - 4);
2658   } else if (Name.endswith("_dpp")) {
2659     setForcedDPP(true);
2660     return Name.substr(0, Name.size() - 4);
2661   } else if (Name.endswith("_sdwa")) {
2662     setForcedSDWA(true);
2663     return Name.substr(0, Name.size() - 5);
2664   }
2665   return Name;
2666 }
2667 
2668 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
2669                                        StringRef Name,
2670                                        SMLoc NameLoc, OperandVector &Operands) {
2671   // Add the instruction mnemonic
2672   Name = parseMnemonicSuffix(Name);
2673   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
2674 
2675   while (!getLexer().is(AsmToken::EndOfStatement)) {
2676     OperandMatchResultTy Res = parseOperand(Operands, Name);
2677 
2678     // Eat the comma or space if there is one.
2679     if (getLexer().is(AsmToken::Comma))
2680       Parser.Lex();
2681 
2682     switch (Res) {
2683       case MatchOperand_Success: break;
2684       case MatchOperand_ParseFail:
2685         Error(getLexer().getLoc(), "failed parsing operand.");
2686         while (!getLexer().is(AsmToken::EndOfStatement)) {
2687           Parser.Lex();
2688         }
2689         return true;
2690       case MatchOperand_NoMatch:
2691         Error(getLexer().getLoc(), "not a valid operand.");
2692         while (!getLexer().is(AsmToken::EndOfStatement)) {
2693           Parser.Lex();
2694         }
2695         return true;
2696     }
2697   }
2698 
2699   return false;
2700 }
2701 
2702 //===----------------------------------------------------------------------===//
2703 // Utility functions
2704 //===----------------------------------------------------------------------===//
2705 
2706 OperandMatchResultTy
2707 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
2708   switch(getLexer().getKind()) {
2709     default: return MatchOperand_NoMatch;
2710     case AsmToken::Identifier: {
2711       StringRef Name = Parser.getTok().getString();
2712       if (!Name.equals(Prefix)) {
2713         return MatchOperand_NoMatch;
2714       }
2715 
2716       Parser.Lex();
2717       if (getLexer().isNot(AsmToken::Colon))
2718         return MatchOperand_ParseFail;
2719 
2720       Parser.Lex();
2721 
2722       bool IsMinus = false;
2723       if (getLexer().getKind() == AsmToken::Minus) {
2724         Parser.Lex();
2725         IsMinus = true;
2726       }
2727 
2728       if (getLexer().isNot(AsmToken::Integer))
2729         return MatchOperand_ParseFail;
2730 
2731       if (getParser().parseAbsoluteExpression(Int))
2732         return MatchOperand_ParseFail;
2733 
2734       if (IsMinus)
2735         Int = -Int;
2736       break;
2737     }
2738   }
2739   return MatchOperand_Success;
2740 }
2741 
2742 OperandMatchResultTy
2743 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
2744                                     AMDGPUOperand::ImmTy ImmTy,
2745                                     bool (*ConvertResult)(int64_t&)) {
2746   SMLoc S = Parser.getTok().getLoc();
2747   int64_t Value = 0;
2748 
2749   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
2750   if (Res != MatchOperand_Success)
2751     return Res;
2752 
2753   if (ConvertResult && !ConvertResult(Value)) {
2754     return MatchOperand_ParseFail;
2755   }
2756 
2757   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
2758   return MatchOperand_Success;
2759 }
2760 
2761 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
2762   const char *Prefix,
2763   OperandVector &Operands,
2764   AMDGPUOperand::ImmTy ImmTy,
2765   bool (*ConvertResult)(int64_t&)) {
2766   StringRef Name = Parser.getTok().getString();
2767   if (!Name.equals(Prefix))
2768     return MatchOperand_NoMatch;
2769 
2770   Parser.Lex();
2771   if (getLexer().isNot(AsmToken::Colon))
2772     return MatchOperand_ParseFail;
2773 
2774   Parser.Lex();
2775   if (getLexer().isNot(AsmToken::LBrac))
2776     return MatchOperand_ParseFail;
2777   Parser.Lex();
2778 
2779   unsigned Val = 0;
2780   SMLoc S = Parser.getTok().getLoc();
2781 
2782   // FIXME: How to verify the number of elements matches the number of src
2783   // operands?
2784   for (int I = 0; I < 4; ++I) {
2785     if (I != 0) {
2786       if (getLexer().is(AsmToken::RBrac))
2787         break;
2788 
2789       if (getLexer().isNot(AsmToken::Comma))
2790         return MatchOperand_ParseFail;
2791       Parser.Lex();
2792     }
2793 
2794     if (getLexer().isNot(AsmToken::Integer))
2795       return MatchOperand_ParseFail;
2796 
2797     int64_t Op;
2798     if (getParser().parseAbsoluteExpression(Op))
2799       return MatchOperand_ParseFail;
2800 
2801     if (Op != 0 && Op != 1)
2802       return MatchOperand_ParseFail;
2803     Val |= (Op << I);
2804   }
2805 
2806   Parser.Lex();
2807   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
2808   return MatchOperand_Success;
2809 }
2810 
2811 OperandMatchResultTy
2812 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
2813                                AMDGPUOperand::ImmTy ImmTy) {
2814   int64_t Bit = 0;
2815   SMLoc S = Parser.getTok().getLoc();
2816 
2817   // We are at the end of the statement, and this is a default argument, so
2818   // use a default value.
2819   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2820     switch(getLexer().getKind()) {
2821       case AsmToken::Identifier: {
2822         StringRef Tok = Parser.getTok().getString();
2823         if (Tok == Name) {
2824           Bit = 1;
2825           Parser.Lex();
2826         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
2827           Bit = 0;
2828           Parser.Lex();
2829         } else {
2830           return MatchOperand_NoMatch;
2831         }
2832         break;
2833       }
2834       default:
2835         return MatchOperand_NoMatch;
2836     }
2837   }
2838 
2839   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
2840   return MatchOperand_Success;
2841 }
2842 
2843 static void addOptionalImmOperand(
2844   MCInst& Inst, const OperandVector& Operands,
2845   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
2846   AMDGPUOperand::ImmTy ImmT,
2847   int64_t Default = 0) {
2848   auto i = OptionalIdx.find(ImmT);
2849   if (i != OptionalIdx.end()) {
2850     unsigned Idx = i->second;
2851     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
2852   } else {
2853     Inst.addOperand(MCOperand::createImm(Default));
2854   }
2855 }
2856 
2857 OperandMatchResultTy
2858 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
2859   if (getLexer().isNot(AsmToken::Identifier)) {
2860     return MatchOperand_NoMatch;
2861   }
2862   StringRef Tok = Parser.getTok().getString();
2863   if (Tok != Prefix) {
2864     return MatchOperand_NoMatch;
2865   }
2866 
2867   Parser.Lex();
2868   if (getLexer().isNot(AsmToken::Colon)) {
2869     return MatchOperand_ParseFail;
2870   }
2871 
2872   Parser.Lex();
2873   if (getLexer().isNot(AsmToken::Identifier)) {
2874     return MatchOperand_ParseFail;
2875   }
2876 
2877   Value = Parser.getTok().getString();
2878   return MatchOperand_Success;
2879 }
2880 
2881 //===----------------------------------------------------------------------===//
2882 // ds
2883 //===----------------------------------------------------------------------===//
2884 
2885 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
2886                                     const OperandVector &Operands) {
2887   OptionalImmIndexMap OptionalIdx;
2888 
2889   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2890     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2891 
2892     // Add the register arguments
2893     if (Op.isReg()) {
2894       Op.addRegOperands(Inst, 1);
2895       continue;
2896     }
2897 
2898     // Handle optional arguments
2899     OptionalIdx[Op.getImmTy()] = i;
2900   }
2901 
2902   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
2903   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
2904   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2905 
2906   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2907 }
2908 
2909 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
2910                                 bool IsGdsHardcoded) {
2911   OptionalImmIndexMap OptionalIdx;
2912 
2913   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2914     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2915 
2916     // Add the register arguments
2917     if (Op.isReg()) {
2918       Op.addRegOperands(Inst, 1);
2919       continue;
2920     }
2921 
2922     if (Op.isToken() && Op.getToken() == "gds") {
2923       IsGdsHardcoded = true;
2924       continue;
2925     }
2926 
2927     // Handle optional arguments
2928     OptionalIdx[Op.getImmTy()] = i;
2929   }
2930 
2931   AMDGPUOperand::ImmTy OffsetType =
2932     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
2933      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
2934                                                       AMDGPUOperand::ImmTyOffset;
2935 
2936   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
2937 
2938   if (!IsGdsHardcoded) {
2939     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
2940   }
2941   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
2942 }
2943 
2944 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
2945   OptionalImmIndexMap OptionalIdx;
2946 
2947   unsigned OperandIdx[4];
2948   unsigned EnMask = 0;
2949   int SrcIdx = 0;
2950 
2951   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
2952     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
2953 
2954     // Add the register arguments
2955     if (Op.isReg()) {
2956       assert(SrcIdx < 4);
2957       OperandIdx[SrcIdx] = Inst.size();
2958       Op.addRegOperands(Inst, 1);
2959       ++SrcIdx;
2960       continue;
2961     }
2962 
2963     if (Op.isOff()) {
2964       assert(SrcIdx < 4);
2965       OperandIdx[SrcIdx] = Inst.size();
2966       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
2967       ++SrcIdx;
2968       continue;
2969     }
2970 
2971     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
2972       Op.addImmOperands(Inst, 1);
2973       continue;
2974     }
2975 
2976     if (Op.isToken() && Op.getToken() == "done")
2977       continue;
2978 
2979     // Handle optional arguments
2980     OptionalIdx[Op.getImmTy()] = i;
2981   }
2982 
2983   assert(SrcIdx == 4);
2984 
2985   bool Compr = false;
2986   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
2987     Compr = true;
2988     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
2989     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
2990     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
2991   }
2992 
2993   for (auto i = 0; i < SrcIdx; ++i) {
2994     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
2995       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
2996     }
2997   }
2998 
2999   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3000   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3001 
3002   Inst.addOperand(MCOperand::createImm(EnMask));
3003 }
3004 
3005 //===----------------------------------------------------------------------===//
3006 // s_waitcnt
3007 //===----------------------------------------------------------------------===//
3008 
3009 static bool
3010 encodeCnt(
3011   const AMDGPU::IsaInfo::IsaVersion ISA,
3012   int64_t &IntVal,
3013   int64_t CntVal,
3014   bool Saturate,
3015   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3016   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3017 {
3018   bool Failed = false;
3019 
3020   IntVal = encode(ISA, IntVal, CntVal);
3021   if (CntVal != decode(ISA, IntVal)) {
3022     if (Saturate) {
3023       IntVal = encode(ISA, IntVal, -1);
3024     } else {
3025       Failed = true;
3026     }
3027   }
3028   return Failed;
3029 }
3030 
3031 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3032   StringRef CntName = Parser.getTok().getString();
3033   int64_t CntVal;
3034 
3035   Parser.Lex();
3036   if (getLexer().isNot(AsmToken::LParen))
3037     return true;
3038 
3039   Parser.Lex();
3040   if (getLexer().isNot(AsmToken::Integer))
3041     return true;
3042 
3043   SMLoc ValLoc = Parser.getTok().getLoc();
3044   if (getParser().parseAbsoluteExpression(CntVal))
3045     return true;
3046 
3047   AMDGPU::IsaInfo::IsaVersion ISA =
3048       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3049 
3050   bool Failed = true;
3051   bool Sat = CntName.endswith("_sat");
3052 
3053   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3054     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3055   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3056     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3057   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3058     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3059   }
3060 
3061   if (Failed) {
3062     Error(ValLoc, "too large value for " + CntName);
3063     return true;
3064   }
3065 
3066   if (getLexer().isNot(AsmToken::RParen)) {
3067     return true;
3068   }
3069 
3070   Parser.Lex();
3071   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3072     const AsmToken NextToken = getLexer().peekTok();
3073     if (NextToken.is(AsmToken::Identifier)) {
3074       Parser.Lex();
3075     }
3076   }
3077 
3078   return false;
3079 }
3080 
3081 OperandMatchResultTy
3082 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3083   AMDGPU::IsaInfo::IsaVersion ISA =
3084       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3085   int64_t Waitcnt = getWaitcntBitMask(ISA);
3086   SMLoc S = Parser.getTok().getLoc();
3087 
3088   switch(getLexer().getKind()) {
3089     default: return MatchOperand_ParseFail;
3090     case AsmToken::Integer:
3091       // The operand can be an integer value.
3092       if (getParser().parseAbsoluteExpression(Waitcnt))
3093         return MatchOperand_ParseFail;
3094       break;
3095 
3096     case AsmToken::Identifier:
3097       do {
3098         if (parseCnt(Waitcnt))
3099           return MatchOperand_ParseFail;
3100       } while(getLexer().isNot(AsmToken::EndOfStatement));
3101       break;
3102   }
3103   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3104   return MatchOperand_Success;
3105 }
3106 
3107 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3108                                           int64_t &Width) {
3109   using namespace llvm::AMDGPU::Hwreg;
3110 
3111   if (Parser.getTok().getString() != "hwreg")
3112     return true;
3113   Parser.Lex();
3114 
3115   if (getLexer().isNot(AsmToken::LParen))
3116     return true;
3117   Parser.Lex();
3118 
3119   if (getLexer().is(AsmToken::Identifier)) {
3120     HwReg.IsSymbolic = true;
3121     HwReg.Id = ID_UNKNOWN_;
3122     const StringRef tok = Parser.getTok().getString();
3123     for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) {
3124       if (tok == IdSymbolic[i]) {
3125         HwReg.Id = i;
3126         break;
3127       }
3128     }
3129     Parser.Lex();
3130   } else {
3131     HwReg.IsSymbolic = false;
3132     if (getLexer().isNot(AsmToken::Integer))
3133       return true;
3134     if (getParser().parseAbsoluteExpression(HwReg.Id))
3135       return true;
3136   }
3137 
3138   if (getLexer().is(AsmToken::RParen)) {
3139     Parser.Lex();
3140     return false;
3141   }
3142 
3143   // optional params
3144   if (getLexer().isNot(AsmToken::Comma))
3145     return true;
3146   Parser.Lex();
3147 
3148   if (getLexer().isNot(AsmToken::Integer))
3149     return true;
3150   if (getParser().parseAbsoluteExpression(Offset))
3151     return true;
3152 
3153   if (getLexer().isNot(AsmToken::Comma))
3154     return true;
3155   Parser.Lex();
3156 
3157   if (getLexer().isNot(AsmToken::Integer))
3158     return true;
3159   if (getParser().parseAbsoluteExpression(Width))
3160     return true;
3161 
3162   if (getLexer().isNot(AsmToken::RParen))
3163     return true;
3164   Parser.Lex();
3165 
3166   return false;
3167 }
3168 
3169 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3170   using namespace llvm::AMDGPU::Hwreg;
3171 
3172   int64_t Imm16Val = 0;
3173   SMLoc S = Parser.getTok().getLoc();
3174 
3175   switch(getLexer().getKind()) {
3176     default: return MatchOperand_NoMatch;
3177     case AsmToken::Integer:
3178       // The operand can be an integer value.
3179       if (getParser().parseAbsoluteExpression(Imm16Val))
3180         return MatchOperand_NoMatch;
3181       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3182         Error(S, "invalid immediate: only 16-bit values are legal");
3183         // Do not return error code, but create an imm operand anyway and proceed
3184         // to the next operand, if any. That avoids unneccessary error messages.
3185       }
3186       break;
3187 
3188     case AsmToken::Identifier: {
3189         OperandInfoTy HwReg(ID_UNKNOWN_);
3190         int64_t Offset = OFFSET_DEFAULT_;
3191         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3192         if (parseHwregConstruct(HwReg, Offset, Width))
3193           return MatchOperand_ParseFail;
3194         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3195           if (HwReg.IsSymbolic)
3196             Error(S, "invalid symbolic name of hardware register");
3197           else
3198             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3199         }
3200         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3201           Error(S, "invalid bit offset: only 5-bit values are legal");
3202         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3203           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3204         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3205       }
3206       break;
3207   }
3208   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3209   return MatchOperand_Success;
3210 }
3211 
3212 bool AMDGPUOperand::isSWaitCnt() const {
3213   return isImm();
3214 }
3215 
3216 bool AMDGPUOperand::isHwreg() const {
3217   return isImmTy(ImmTyHwreg);
3218 }
3219 
3220 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3221   using namespace llvm::AMDGPU::SendMsg;
3222 
3223   if (Parser.getTok().getString() != "sendmsg")
3224     return true;
3225   Parser.Lex();
3226 
3227   if (getLexer().isNot(AsmToken::LParen))
3228     return true;
3229   Parser.Lex();
3230 
3231   if (getLexer().is(AsmToken::Identifier)) {
3232     Msg.IsSymbolic = true;
3233     Msg.Id = ID_UNKNOWN_;
3234     const std::string tok = Parser.getTok().getString();
3235     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3236       switch(i) {
3237         default: continue; // Omit gaps.
3238         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3239       }
3240       if (tok == IdSymbolic[i]) {
3241         Msg.Id = i;
3242         break;
3243       }
3244     }
3245     Parser.Lex();
3246   } else {
3247     Msg.IsSymbolic = false;
3248     if (getLexer().isNot(AsmToken::Integer))
3249       return true;
3250     if (getParser().parseAbsoluteExpression(Msg.Id))
3251       return true;
3252     if (getLexer().is(AsmToken::Integer))
3253       if (getParser().parseAbsoluteExpression(Msg.Id))
3254         Msg.Id = ID_UNKNOWN_;
3255   }
3256   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3257     return false;
3258 
3259   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3260     if (getLexer().isNot(AsmToken::RParen))
3261       return true;
3262     Parser.Lex();
3263     return false;
3264   }
3265 
3266   if (getLexer().isNot(AsmToken::Comma))
3267     return true;
3268   Parser.Lex();
3269 
3270   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3271   Operation.Id = ID_UNKNOWN_;
3272   if (getLexer().is(AsmToken::Identifier)) {
3273     Operation.IsSymbolic = true;
3274     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3275     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3276     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3277     const StringRef Tok = Parser.getTok().getString();
3278     for (int i = F; i < L; ++i) {
3279       if (Tok == S[i]) {
3280         Operation.Id = i;
3281         break;
3282       }
3283     }
3284     Parser.Lex();
3285   } else {
3286     Operation.IsSymbolic = false;
3287     if (getLexer().isNot(AsmToken::Integer))
3288       return true;
3289     if (getParser().parseAbsoluteExpression(Operation.Id))
3290       return true;
3291   }
3292 
3293   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3294     // Stream id is optional.
3295     if (getLexer().is(AsmToken::RParen)) {
3296       Parser.Lex();
3297       return false;
3298     }
3299 
3300     if (getLexer().isNot(AsmToken::Comma))
3301       return true;
3302     Parser.Lex();
3303 
3304     if (getLexer().isNot(AsmToken::Integer))
3305       return true;
3306     if (getParser().parseAbsoluteExpression(StreamId))
3307       return true;
3308   }
3309 
3310   if (getLexer().isNot(AsmToken::RParen))
3311     return true;
3312   Parser.Lex();
3313   return false;
3314 }
3315 
3316 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3317   if (getLexer().getKind() != AsmToken::Identifier)
3318     return MatchOperand_NoMatch;
3319 
3320   StringRef Str = Parser.getTok().getString();
3321   int Slot = StringSwitch<int>(Str)
3322     .Case("p10", 0)
3323     .Case("p20", 1)
3324     .Case("p0", 2)
3325     .Default(-1);
3326 
3327   SMLoc S = Parser.getTok().getLoc();
3328   if (Slot == -1)
3329     return MatchOperand_ParseFail;
3330 
3331   Parser.Lex();
3332   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3333                                               AMDGPUOperand::ImmTyInterpSlot));
3334   return MatchOperand_Success;
3335 }
3336 
3337 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3338   if (getLexer().getKind() != AsmToken::Identifier)
3339     return MatchOperand_NoMatch;
3340 
3341   StringRef Str = Parser.getTok().getString();
3342   if (!Str.startswith("attr"))
3343     return MatchOperand_NoMatch;
3344 
3345   StringRef Chan = Str.take_back(2);
3346   int AttrChan = StringSwitch<int>(Chan)
3347     .Case(".x", 0)
3348     .Case(".y", 1)
3349     .Case(".z", 2)
3350     .Case(".w", 3)
3351     .Default(-1);
3352   if (AttrChan == -1)
3353     return MatchOperand_ParseFail;
3354 
3355   Str = Str.drop_back(2).drop_front(4);
3356 
3357   uint8_t Attr;
3358   if (Str.getAsInteger(10, Attr))
3359     return MatchOperand_ParseFail;
3360 
3361   SMLoc S = Parser.getTok().getLoc();
3362   Parser.Lex();
3363   if (Attr > 63) {
3364     Error(S, "out of bounds attr");
3365     return MatchOperand_Success;
3366   }
3367 
3368   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
3369 
3370   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
3371                                               AMDGPUOperand::ImmTyInterpAttr));
3372   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
3373                                               AMDGPUOperand::ImmTyAttrChan));
3374   return MatchOperand_Success;
3375 }
3376 
3377 void AMDGPUAsmParser::errorExpTgt() {
3378   Error(Parser.getTok().getLoc(), "invalid exp target");
3379 }
3380 
3381 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
3382                                                       uint8_t &Val) {
3383   if (Str == "null") {
3384     Val = 9;
3385     return MatchOperand_Success;
3386   }
3387 
3388   if (Str.startswith("mrt")) {
3389     Str = Str.drop_front(3);
3390     if (Str == "z") { // == mrtz
3391       Val = 8;
3392       return MatchOperand_Success;
3393     }
3394 
3395     if (Str.getAsInteger(10, Val))
3396       return MatchOperand_ParseFail;
3397 
3398     if (Val > 7)
3399       errorExpTgt();
3400 
3401     return MatchOperand_Success;
3402   }
3403 
3404   if (Str.startswith("pos")) {
3405     Str = Str.drop_front(3);
3406     if (Str.getAsInteger(10, Val))
3407       return MatchOperand_ParseFail;
3408 
3409     if (Val > 3)
3410       errorExpTgt();
3411 
3412     Val += 12;
3413     return MatchOperand_Success;
3414   }
3415 
3416   if (Str.startswith("param")) {
3417     Str = Str.drop_front(5);
3418     if (Str.getAsInteger(10, Val))
3419       return MatchOperand_ParseFail;
3420 
3421     if (Val >= 32)
3422       errorExpTgt();
3423 
3424     Val += 32;
3425     return MatchOperand_Success;
3426   }
3427 
3428   if (Str.startswith("invalid_target_")) {
3429     Str = Str.drop_front(15);
3430     if (Str.getAsInteger(10, Val))
3431       return MatchOperand_ParseFail;
3432 
3433     errorExpTgt();
3434     return MatchOperand_Success;
3435   }
3436 
3437   return MatchOperand_NoMatch;
3438 }
3439 
3440 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
3441   uint8_t Val;
3442   StringRef Str = Parser.getTok().getString();
3443 
3444   auto Res = parseExpTgtImpl(Str, Val);
3445   if (Res != MatchOperand_Success)
3446     return Res;
3447 
3448   SMLoc S = Parser.getTok().getLoc();
3449   Parser.Lex();
3450 
3451   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
3452                                               AMDGPUOperand::ImmTyExpTgt));
3453   return MatchOperand_Success;
3454 }
3455 
3456 OperandMatchResultTy
3457 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
3458   using namespace llvm::AMDGPU::SendMsg;
3459 
3460   int64_t Imm16Val = 0;
3461   SMLoc S = Parser.getTok().getLoc();
3462 
3463   switch(getLexer().getKind()) {
3464   default:
3465     return MatchOperand_NoMatch;
3466   case AsmToken::Integer:
3467     // The operand can be an integer value.
3468     if (getParser().parseAbsoluteExpression(Imm16Val))
3469       return MatchOperand_NoMatch;
3470     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3471       Error(S, "invalid immediate: only 16-bit values are legal");
3472       // Do not return error code, but create an imm operand anyway and proceed
3473       // to the next operand, if any. That avoids unneccessary error messages.
3474     }
3475     break;
3476   case AsmToken::Identifier: {
3477       OperandInfoTy Msg(ID_UNKNOWN_);
3478       OperandInfoTy Operation(OP_UNKNOWN_);
3479       int64_t StreamId = STREAM_ID_DEFAULT_;
3480       if (parseSendMsgConstruct(Msg, Operation, StreamId))
3481         return MatchOperand_ParseFail;
3482       do {
3483         // Validate and encode message ID.
3484         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
3485                 || Msg.Id == ID_SYSMSG)) {
3486           if (Msg.IsSymbolic)
3487             Error(S, "invalid/unsupported symbolic name of message");
3488           else
3489             Error(S, "invalid/unsupported code of message");
3490           break;
3491         }
3492         Imm16Val = (Msg.Id << ID_SHIFT_);
3493         // Validate and encode operation ID.
3494         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
3495           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
3496             if (Operation.IsSymbolic)
3497               Error(S, "invalid symbolic name of GS_OP");
3498             else
3499               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
3500             break;
3501           }
3502           if (Operation.Id == OP_GS_NOP
3503               && Msg.Id != ID_GS_DONE) {
3504             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
3505             break;
3506           }
3507           Imm16Val |= (Operation.Id << OP_SHIFT_);
3508         }
3509         if (Msg.Id == ID_SYSMSG) {
3510           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
3511             if (Operation.IsSymbolic)
3512               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
3513             else
3514               Error(S, "invalid/unsupported code of SYSMSG_OP");
3515             break;
3516           }
3517           Imm16Val |= (Operation.Id << OP_SHIFT_);
3518         }
3519         // Validate and encode stream ID.
3520         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3521           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
3522             Error(S, "invalid stream id: only 2-bit values are legal");
3523             break;
3524           }
3525           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
3526         }
3527       } while (false);
3528     }
3529     break;
3530   }
3531   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
3532   return MatchOperand_Success;
3533 }
3534 
3535 bool AMDGPUOperand::isSendMsg() const {
3536   return isImmTy(ImmTySendMsg);
3537 }
3538 
3539 //===----------------------------------------------------------------------===//
3540 // parser helpers
3541 //===----------------------------------------------------------------------===//
3542 
3543 bool
3544 AMDGPUAsmParser::trySkipId(const StringRef Id) {
3545   if (getLexer().getKind() == AsmToken::Identifier &&
3546       Parser.getTok().getString() == Id) {
3547     Parser.Lex();
3548     return true;
3549   }
3550   return false;
3551 }
3552 
3553 bool
3554 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
3555   if (getLexer().getKind() == Kind) {
3556     Parser.Lex();
3557     return true;
3558   }
3559   return false;
3560 }
3561 
3562 bool
3563 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
3564                            const StringRef ErrMsg) {
3565   if (!trySkipToken(Kind)) {
3566     Error(Parser.getTok().getLoc(), ErrMsg);
3567     return false;
3568   }
3569   return true;
3570 }
3571 
3572 bool
3573 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
3574   return !getParser().parseAbsoluteExpression(Imm);
3575 }
3576 
3577 bool
3578 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
3579   SMLoc S = Parser.getTok().getLoc();
3580   if (getLexer().getKind() == AsmToken::String) {
3581     Val = Parser.getTok().getStringContents();
3582     Parser.Lex();
3583     return true;
3584   } else {
3585     Error(S, ErrMsg);
3586     return false;
3587   }
3588 }
3589 
3590 //===----------------------------------------------------------------------===//
3591 // swizzle
3592 //===----------------------------------------------------------------------===//
3593 
3594 LLVM_READNONE
3595 static unsigned
3596 encodeBitmaskPerm(const unsigned AndMask,
3597                   const unsigned OrMask,
3598                   const unsigned XorMask) {
3599   using namespace llvm::AMDGPU::Swizzle;
3600 
3601   return BITMASK_PERM_ENC |
3602          (AndMask << BITMASK_AND_SHIFT) |
3603          (OrMask  << BITMASK_OR_SHIFT)  |
3604          (XorMask << BITMASK_XOR_SHIFT);
3605 }
3606 
3607 bool
3608 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
3609                                       const unsigned MinVal,
3610                                       const unsigned MaxVal,
3611                                       const StringRef ErrMsg) {
3612   for (unsigned i = 0; i < OpNum; ++i) {
3613     if (!skipToken(AsmToken::Comma, "expected a comma")){
3614       return false;
3615     }
3616     SMLoc ExprLoc = Parser.getTok().getLoc();
3617     if (!parseExpr(Op[i])) {
3618       return false;
3619     }
3620     if (Op[i] < MinVal || Op[i] > MaxVal) {
3621       Error(ExprLoc, ErrMsg);
3622       return false;
3623     }
3624   }
3625 
3626   return true;
3627 }
3628 
3629 bool
3630 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
3631   using namespace llvm::AMDGPU::Swizzle;
3632 
3633   int64_t Lane[LANE_NUM];
3634   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
3635                            "expected a 2-bit lane id")) {
3636     Imm = QUAD_PERM_ENC;
3637     for (auto i = 0; i < LANE_NUM; ++i) {
3638       Imm |= Lane[i] << (LANE_SHIFT * i);
3639     }
3640     return true;
3641   }
3642   return false;
3643 }
3644 
3645 bool
3646 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
3647   using namespace llvm::AMDGPU::Swizzle;
3648 
3649   SMLoc S = Parser.getTok().getLoc();
3650   int64_t GroupSize;
3651   int64_t LaneIdx;
3652 
3653   if (!parseSwizzleOperands(1, &GroupSize,
3654                             2, 32,
3655                             "group size must be in the interval [2,32]")) {
3656     return false;
3657   }
3658   if (!isPowerOf2_64(GroupSize)) {
3659     Error(S, "group size must be a power of two");
3660     return false;
3661   }
3662   if (parseSwizzleOperands(1, &LaneIdx,
3663                            0, GroupSize - 1,
3664                            "lane id must be in the interval [0,group size - 1]")) {
3665     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
3666     return true;
3667   }
3668   return false;
3669 }
3670 
3671 bool
3672 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
3673   using namespace llvm::AMDGPU::Swizzle;
3674 
3675   SMLoc S = Parser.getTok().getLoc();
3676   int64_t GroupSize;
3677 
3678   if (!parseSwizzleOperands(1, &GroupSize,
3679       2, 32, "group size must be in the interval [2,32]")) {
3680     return false;
3681   }
3682   if (!isPowerOf2_64(GroupSize)) {
3683     Error(S, "group size must be a power of two");
3684     return false;
3685   }
3686 
3687   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
3688   return true;
3689 }
3690 
3691 bool
3692 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
3693   using namespace llvm::AMDGPU::Swizzle;
3694 
3695   SMLoc S = Parser.getTok().getLoc();
3696   int64_t GroupSize;
3697 
3698   if (!parseSwizzleOperands(1, &GroupSize,
3699       1, 16, "group size must be in the interval [1,16]")) {
3700     return false;
3701   }
3702   if (!isPowerOf2_64(GroupSize)) {
3703     Error(S, "group size must be a power of two");
3704     return false;
3705   }
3706 
3707   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
3708   return true;
3709 }
3710 
3711 bool
3712 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
3713   using namespace llvm::AMDGPU::Swizzle;
3714 
3715   if (!skipToken(AsmToken::Comma, "expected a comma")) {
3716     return false;
3717   }
3718 
3719   StringRef Ctl;
3720   SMLoc StrLoc = Parser.getTok().getLoc();
3721   if (!parseString(Ctl)) {
3722     return false;
3723   }
3724   if (Ctl.size() != BITMASK_WIDTH) {
3725     Error(StrLoc, "expected a 5-character mask");
3726     return false;
3727   }
3728 
3729   unsigned AndMask = 0;
3730   unsigned OrMask = 0;
3731   unsigned XorMask = 0;
3732 
3733   for (size_t i = 0; i < Ctl.size(); ++i) {
3734     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
3735     switch(Ctl[i]) {
3736     default:
3737       Error(StrLoc, "invalid mask");
3738       return false;
3739     case '0':
3740       break;
3741     case '1':
3742       OrMask |= Mask;
3743       break;
3744     case 'p':
3745       AndMask |= Mask;
3746       break;
3747     case 'i':
3748       AndMask |= Mask;
3749       XorMask |= Mask;
3750       break;
3751     }
3752   }
3753 
3754   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
3755   return true;
3756 }
3757 
3758 bool
3759 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
3760 
3761   SMLoc OffsetLoc = Parser.getTok().getLoc();
3762 
3763   if (!parseExpr(Imm)) {
3764     return false;
3765   }
3766   if (!isUInt<16>(Imm)) {
3767     Error(OffsetLoc, "expected a 16-bit offset");
3768     return false;
3769   }
3770   return true;
3771 }
3772 
3773 bool
3774 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
3775   using namespace llvm::AMDGPU::Swizzle;
3776 
3777   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
3778 
3779     SMLoc ModeLoc = Parser.getTok().getLoc();
3780     bool Ok = false;
3781 
3782     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
3783       Ok = parseSwizzleQuadPerm(Imm);
3784     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
3785       Ok = parseSwizzleBitmaskPerm(Imm);
3786     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
3787       Ok = parseSwizzleBroadcast(Imm);
3788     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
3789       Ok = parseSwizzleSwap(Imm);
3790     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
3791       Ok = parseSwizzleReverse(Imm);
3792     } else {
3793       Error(ModeLoc, "expected a swizzle mode");
3794     }
3795 
3796     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
3797   }
3798 
3799   return false;
3800 }
3801 
3802 OperandMatchResultTy
3803 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
3804   SMLoc S = Parser.getTok().getLoc();
3805   int64_t Imm = 0;
3806 
3807   if (trySkipId("offset")) {
3808 
3809     bool Ok = false;
3810     if (skipToken(AsmToken::Colon, "expected a colon")) {
3811       if (trySkipId("swizzle")) {
3812         Ok = parseSwizzleMacro(Imm);
3813       } else {
3814         Ok = parseSwizzleOffset(Imm);
3815       }
3816     }
3817 
3818     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
3819 
3820     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
3821   } else {
3822     return MatchOperand_NoMatch;
3823   }
3824 }
3825 
3826 bool
3827 AMDGPUOperand::isSwizzle() const {
3828   return isImmTy(ImmTySwizzle);
3829 }
3830 
3831 //===----------------------------------------------------------------------===//
3832 // sopp branch targets
3833 //===----------------------------------------------------------------------===//
3834 
3835 OperandMatchResultTy
3836 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
3837   SMLoc S = Parser.getTok().getLoc();
3838 
3839   switch (getLexer().getKind()) {
3840     default: return MatchOperand_ParseFail;
3841     case AsmToken::Integer: {
3842       int64_t Imm;
3843       if (getParser().parseAbsoluteExpression(Imm))
3844         return MatchOperand_ParseFail;
3845       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
3846       return MatchOperand_Success;
3847     }
3848 
3849     case AsmToken::Identifier:
3850       Operands.push_back(AMDGPUOperand::CreateExpr(this,
3851           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
3852                                   Parser.getTok().getString()), getContext()), S));
3853       Parser.Lex();
3854       return MatchOperand_Success;
3855   }
3856 }
3857 
3858 //===----------------------------------------------------------------------===//
3859 // mubuf
3860 //===----------------------------------------------------------------------===//
3861 
3862 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
3863   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
3864 }
3865 
3866 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
3867   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
3868 }
3869 
3870 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
3871   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
3872 }
3873 
3874 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
3875                                const OperandVector &Operands,
3876                                bool IsAtomic, bool IsAtomicReturn) {
3877   OptionalImmIndexMap OptionalIdx;
3878   assert(IsAtomicReturn ? IsAtomic : true);
3879 
3880   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3881     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3882 
3883     // Add the register arguments
3884     if (Op.isReg()) {
3885       Op.addRegOperands(Inst, 1);
3886       continue;
3887     }
3888 
3889     // Handle the case where soffset is an immediate
3890     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3891       Op.addImmOperands(Inst, 1);
3892       continue;
3893     }
3894 
3895     // Handle tokens like 'offen' which are sometimes hard-coded into the
3896     // asm string.  There are no MCInst operands for these.
3897     if (Op.isToken()) {
3898       continue;
3899     }
3900     assert(Op.isImm());
3901 
3902     // Handle optional arguments
3903     OptionalIdx[Op.getImmTy()] = i;
3904   }
3905 
3906   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
3907   if (IsAtomicReturn) {
3908     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
3909     Inst.insert(I, *I);
3910   }
3911 
3912   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
3913   if (!IsAtomic) { // glc is hard-coded.
3914     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3915   }
3916   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3917   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3918 }
3919 
3920 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
3921   OptionalImmIndexMap OptionalIdx;
3922 
3923   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3924     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3925 
3926     // Add the register arguments
3927     if (Op.isReg()) {
3928       Op.addRegOperands(Inst, 1);
3929       continue;
3930     }
3931 
3932     // Handle the case where soffset is an immediate
3933     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
3934       Op.addImmOperands(Inst, 1);
3935       continue;
3936     }
3937 
3938     // Handle tokens like 'offen' which are sometimes hard-coded into the
3939     // asm string.  There are no MCInst operands for these.
3940     if (Op.isToken()) {
3941       continue;
3942     }
3943     assert(Op.isImm());
3944 
3945     // Handle optional arguments
3946     OptionalIdx[Op.getImmTy()] = i;
3947   }
3948 
3949   addOptionalImmOperand(Inst, Operands, OptionalIdx,
3950                         AMDGPUOperand::ImmTyOffset);
3951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
3952   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
3953   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3956 }
3957 
3958 //===----------------------------------------------------------------------===//
3959 // mimg
3960 //===----------------------------------------------------------------------===//
3961 
3962 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
3963                               bool IsAtomic) {
3964   unsigned I = 1;
3965   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3966   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
3967     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
3968   }
3969 
3970   if (IsAtomic) {
3971     // Add src, same as dst
3972     ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1);
3973   }
3974 
3975   OptionalImmIndexMap OptionalIdx;
3976 
3977   for (unsigned E = Operands.size(); I != E; ++I) {
3978     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3979 
3980     // Add the register arguments
3981     if (Op.isRegOrImm()) {
3982       Op.addRegOrImmOperands(Inst, 1);
3983       continue;
3984     } else if (Op.isImmModifier()) {
3985       OptionalIdx[Op.getImmTy()] = I;
3986     } else {
3987       llvm_unreachable("unexpected operand type");
3988     }
3989   }
3990 
3991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
3992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
3993   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
3994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
3995   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
3996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
3997   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
3998   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
3999 }
4000 
4001 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4002   cvtMIMG(Inst, Operands, true);
4003 }
4004 
4005 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
4006   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
4007 }
4008 
4009 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
4010   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
4011 }
4012 
4013 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
4014   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
4015 }
4016 
4017 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
4018   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
4019 }
4020 
4021 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
4022   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
4023 }
4024 
4025 //===----------------------------------------------------------------------===//
4026 // smrd
4027 //===----------------------------------------------------------------------===//
4028 
4029 bool AMDGPUOperand::isSMRDOffset8() const {
4030   return isImm() && isUInt<8>(getImm());
4031 }
4032 
4033 bool AMDGPUOperand::isSMRDOffset20() const {
4034   return isImm() && isUInt<20>(getImm());
4035 }
4036 
4037 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4038   // 32-bit literals are only supported on CI and we only want to use them
4039   // when the offset is > 8-bits.
4040   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4041 }
4042 
4043 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4044   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4045 }
4046 
4047 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4048   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4049 }
4050 
4051 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4052   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4053 }
4054 
4055 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4056   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4057 }
4058 
4059 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4060   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4061 }
4062 
4063 //===----------------------------------------------------------------------===//
4064 // vop3
4065 //===----------------------------------------------------------------------===//
4066 
4067 static bool ConvertOmodMul(int64_t &Mul) {
4068   if (Mul != 1 && Mul != 2 && Mul != 4)
4069     return false;
4070 
4071   Mul >>= 1;
4072   return true;
4073 }
4074 
4075 static bool ConvertOmodDiv(int64_t &Div) {
4076   if (Div == 1) {
4077     Div = 0;
4078     return true;
4079   }
4080 
4081   if (Div == 2) {
4082     Div = 3;
4083     return true;
4084   }
4085 
4086   return false;
4087 }
4088 
4089 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4090   if (BoundCtrl == 0) {
4091     BoundCtrl = 1;
4092     return true;
4093   }
4094 
4095   if (BoundCtrl == -1) {
4096     BoundCtrl = 0;
4097     return true;
4098   }
4099 
4100   return false;
4101 }
4102 
4103 // Note: the order in this table matches the order of operands in AsmString.
4104 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4105   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4106   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4107   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4108   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4109   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4110   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4111   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4112   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4113   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4114   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4115   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4116   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4117   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4118   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4119   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4120   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4121   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4122   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4123   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4124   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4125   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4126   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4127   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4128   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4129   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4130   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4131   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4132   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4133   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4134   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4135   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4136   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4137   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4138   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4139 };
4140 
4141 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4142   OperandMatchResultTy res;
4143   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4144     // try to parse any optional operand here
4145     if (Op.IsBit) {
4146       res = parseNamedBit(Op.Name, Operands, Op.Type);
4147     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4148       res = parseOModOperand(Operands);
4149     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4150                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4151                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4152       res = parseSDWASel(Operands, Op.Name, Op.Type);
4153     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4154       res = parseSDWADstUnused(Operands);
4155     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4156                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4157                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4158                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4159       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4160                                         Op.ConvertResult);
4161     } else {
4162       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4163     }
4164     if (res != MatchOperand_NoMatch) {
4165       return res;
4166     }
4167   }
4168   return MatchOperand_NoMatch;
4169 }
4170 
4171 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4172   StringRef Name = Parser.getTok().getString();
4173   if (Name == "mul") {
4174     return parseIntWithPrefix("mul", Operands,
4175                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4176   }
4177 
4178   if (Name == "div") {
4179     return parseIntWithPrefix("div", Operands,
4180                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4181   }
4182 
4183   return MatchOperand_NoMatch;
4184 }
4185 
4186 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4187   cvtVOP3P(Inst, Operands);
4188 
4189   int Opc = Inst.getOpcode();
4190 
4191   int SrcNum;
4192   const int Ops[] = { AMDGPU::OpName::src0,
4193                       AMDGPU::OpName::src1,
4194                       AMDGPU::OpName::src2 };
4195   for (SrcNum = 0;
4196        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4197        ++SrcNum);
4198   assert(SrcNum > 0);
4199 
4200   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4201   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4202 
4203   if ((OpSel & (1 << SrcNum)) != 0) {
4204     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4205     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4206     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4207   }
4208 }
4209 
4210 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4211       // 1. This operand is input modifiers
4212   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4213       // 2. This is not last operand
4214       && Desc.NumOperands > (OpNum + 1)
4215       // 3. Next operand is register class
4216       && Desc.OpInfo[OpNum + 1].RegClass != -1
4217       // 4. Next register is not tied to any other operand
4218       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4219 }
4220 
4221 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4222 {
4223   OptionalImmIndexMap OptionalIdx;
4224   unsigned Opc = Inst.getOpcode();
4225 
4226   unsigned I = 1;
4227   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4228   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4229     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4230   }
4231 
4232   for (unsigned E = Operands.size(); I != E; ++I) {
4233     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4234     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4235       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4236     } else if (Op.isInterpSlot() ||
4237                Op.isInterpAttr() ||
4238                Op.isAttrChan()) {
4239       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4240     } else if (Op.isImmModifier()) {
4241       OptionalIdx[Op.getImmTy()] = I;
4242     } else {
4243       llvm_unreachable("unhandled operand type");
4244     }
4245   }
4246 
4247   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4248     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4249   }
4250 
4251   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4252     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4253   }
4254 
4255   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4256     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4257   }
4258 }
4259 
4260 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4261                               OptionalImmIndexMap &OptionalIdx) {
4262   unsigned Opc = Inst.getOpcode();
4263 
4264   unsigned I = 1;
4265   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4266   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4267     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4268   }
4269 
4270   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4271     // This instruction has src modifiers
4272     for (unsigned E = Operands.size(); I != E; ++I) {
4273       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4274       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4275         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4276       } else if (Op.isImmModifier()) {
4277         OptionalIdx[Op.getImmTy()] = I;
4278       } else if (Op.isRegOrImm()) {
4279         Op.addRegOrImmOperands(Inst, 1);
4280       } else {
4281         llvm_unreachable("unhandled operand type");
4282       }
4283     }
4284   } else {
4285     // No src modifiers
4286     for (unsigned E = Operands.size(); I != E; ++I) {
4287       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4288       if (Op.isMod()) {
4289         OptionalIdx[Op.getImmTy()] = I;
4290       } else {
4291         Op.addRegOrImmOperands(Inst, 1);
4292       }
4293     }
4294   }
4295 
4296   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4297     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4298   }
4299 
4300   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4301     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4302   }
4303 
4304   // special case v_mac_{f16, f32}:
4305   // it has src2 register operand that is tied to dst operand
4306   // we don't allow modifiers for this operand in assembler so src2_modifiers
4307   // should be 0
4308   if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
4309       Opc == AMDGPU::V_MAC_F16_e64_vi) {
4310     auto it = Inst.begin();
4311     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
4312     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
4313     ++it;
4314     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4315   }
4316 }
4317 
4318 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
4319   OptionalImmIndexMap OptionalIdx;
4320   cvtVOP3(Inst, Operands, OptionalIdx);
4321 }
4322 
4323 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
4324                                const OperandVector &Operands) {
4325   OptionalImmIndexMap OptIdx;
4326   const int Opc = Inst.getOpcode();
4327   const MCInstrDesc &Desc = MII.get(Opc);
4328 
4329   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
4330 
4331   cvtVOP3(Inst, Operands, OptIdx);
4332 
4333   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
4334     assert(!IsPacked);
4335     Inst.addOperand(Inst.getOperand(0));
4336   }
4337 
4338   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
4339   // instruction, and then figure out where to actually put the modifiers
4340 
4341   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
4342 
4343   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4344   if (OpSelHiIdx != -1) {
4345     int DefaultVal = IsPacked ? -1 : 0;
4346     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
4347                           DefaultVal);
4348   }
4349 
4350   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
4351   if (NegLoIdx != -1) {
4352     assert(IsPacked);
4353     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
4354     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
4355   }
4356 
4357   const int Ops[] = { AMDGPU::OpName::src0,
4358                       AMDGPU::OpName::src1,
4359                       AMDGPU::OpName::src2 };
4360   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
4361                          AMDGPU::OpName::src1_modifiers,
4362                          AMDGPU::OpName::src2_modifiers };
4363 
4364   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4365 
4366   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4367   unsigned OpSelHi = 0;
4368   unsigned NegLo = 0;
4369   unsigned NegHi = 0;
4370 
4371   if (OpSelHiIdx != -1) {
4372     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4373   }
4374 
4375   if (NegLoIdx != -1) {
4376     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
4377     NegLo = Inst.getOperand(NegLoIdx).getImm();
4378     NegHi = Inst.getOperand(NegHiIdx).getImm();
4379   }
4380 
4381   for (int J = 0; J < 3; ++J) {
4382     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
4383     if (OpIdx == -1)
4384       break;
4385 
4386     uint32_t ModVal = 0;
4387 
4388     if ((OpSel & (1 << J)) != 0)
4389       ModVal |= SISrcMods::OP_SEL_0;
4390 
4391     if ((OpSelHi & (1 << J)) != 0)
4392       ModVal |= SISrcMods::OP_SEL_1;
4393 
4394     if ((NegLo & (1 << J)) != 0)
4395       ModVal |= SISrcMods::NEG;
4396 
4397     if ((NegHi & (1 << J)) != 0)
4398       ModVal |= SISrcMods::NEG_HI;
4399 
4400     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
4401 
4402     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
4403   }
4404 }
4405 
4406 //===----------------------------------------------------------------------===//
4407 // dpp
4408 //===----------------------------------------------------------------------===//
4409 
4410 bool AMDGPUOperand::isDPPCtrl() const {
4411   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
4412   if (result) {
4413     int64_t Imm = getImm();
4414     return ((Imm >= 0x000) && (Imm <= 0x0ff)) ||
4415            ((Imm >= 0x101) && (Imm <= 0x10f)) ||
4416            ((Imm >= 0x111) && (Imm <= 0x11f)) ||
4417            ((Imm >= 0x121) && (Imm <= 0x12f)) ||
4418            (Imm == 0x130) ||
4419            (Imm == 0x134) ||
4420            (Imm == 0x138) ||
4421            (Imm == 0x13c) ||
4422            (Imm == 0x140) ||
4423            (Imm == 0x141) ||
4424            (Imm == 0x142) ||
4425            (Imm == 0x143);
4426   }
4427   return false;
4428 }
4429 
4430 bool AMDGPUOperand::isGPRIdxMode() const {
4431   return isImm() && isUInt<4>(getImm());
4432 }
4433 
4434 bool AMDGPUOperand::isS16Imm() const {
4435   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
4436 }
4437 
4438 bool AMDGPUOperand::isU16Imm() const {
4439   return isImm() && isUInt<16>(getImm());
4440 }
4441 
4442 OperandMatchResultTy
4443 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
4444   SMLoc S = Parser.getTok().getLoc();
4445   StringRef Prefix;
4446   int64_t Int;
4447 
4448   if (getLexer().getKind() == AsmToken::Identifier) {
4449     Prefix = Parser.getTok().getString();
4450   } else {
4451     return MatchOperand_NoMatch;
4452   }
4453 
4454   if (Prefix == "row_mirror") {
4455     Int = 0x140;
4456     Parser.Lex();
4457   } else if (Prefix == "row_half_mirror") {
4458     Int = 0x141;
4459     Parser.Lex();
4460   } else {
4461     // Check to prevent parseDPPCtrlOps from eating invalid tokens
4462     if (Prefix != "quad_perm"
4463         && Prefix != "row_shl"
4464         && Prefix != "row_shr"
4465         && Prefix != "row_ror"
4466         && Prefix != "wave_shl"
4467         && Prefix != "wave_rol"
4468         && Prefix != "wave_shr"
4469         && Prefix != "wave_ror"
4470         && Prefix != "row_bcast") {
4471       return MatchOperand_NoMatch;
4472     }
4473 
4474     Parser.Lex();
4475     if (getLexer().isNot(AsmToken::Colon))
4476       return MatchOperand_ParseFail;
4477 
4478     if (Prefix == "quad_perm") {
4479       // quad_perm:[%d,%d,%d,%d]
4480       Parser.Lex();
4481       if (getLexer().isNot(AsmToken::LBrac))
4482         return MatchOperand_ParseFail;
4483       Parser.Lex();
4484 
4485       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
4486         return MatchOperand_ParseFail;
4487 
4488       for (int i = 0; i < 3; ++i) {
4489         if (getLexer().isNot(AsmToken::Comma))
4490           return MatchOperand_ParseFail;
4491         Parser.Lex();
4492 
4493         int64_t Temp;
4494         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
4495           return MatchOperand_ParseFail;
4496         const int shift = i*2 + 2;
4497         Int += (Temp << shift);
4498       }
4499 
4500       if (getLexer().isNot(AsmToken::RBrac))
4501         return MatchOperand_ParseFail;
4502       Parser.Lex();
4503     } else {
4504       // sel:%d
4505       Parser.Lex();
4506       if (getParser().parseAbsoluteExpression(Int))
4507         return MatchOperand_ParseFail;
4508 
4509       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
4510         Int |= 0x100;
4511       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
4512         Int |= 0x110;
4513       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
4514         Int |= 0x120;
4515       } else if (Prefix == "wave_shl" && 1 == Int) {
4516         Int = 0x130;
4517       } else if (Prefix == "wave_rol" && 1 == Int) {
4518         Int = 0x134;
4519       } else if (Prefix == "wave_shr" && 1 == Int) {
4520         Int = 0x138;
4521       } else if (Prefix == "wave_ror" && 1 == Int) {
4522         Int = 0x13C;
4523       } else if (Prefix == "row_bcast") {
4524         if (Int == 15) {
4525           Int = 0x142;
4526         } else if (Int == 31) {
4527           Int = 0x143;
4528         } else {
4529           return MatchOperand_ParseFail;
4530         }
4531       } else {
4532         return MatchOperand_ParseFail;
4533       }
4534     }
4535   }
4536 
4537   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
4538   return MatchOperand_Success;
4539 }
4540 
4541 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
4542   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
4543 }
4544 
4545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
4546   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
4547 }
4548 
4549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
4550   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
4551 }
4552 
4553 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
4554   OptionalImmIndexMap OptionalIdx;
4555 
4556   unsigned I = 1;
4557   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4558   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4559     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4560   }
4561 
4562   // All DPP instructions with at least one source operand have a fake "old"
4563   // source at the beginning that's tied to the dst operand. Handle it here.
4564   if (Desc.getNumOperands() >= 2)
4565     Inst.addOperand(Inst.getOperand(0));
4566 
4567   for (unsigned E = Operands.size(); I != E; ++I) {
4568     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4569     // Add the register arguments
4570     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4571       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
4572       // Skip it.
4573       continue;
4574     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4575       Op.addRegWithFPInputModsOperands(Inst, 2);
4576     } else if (Op.isDPPCtrl()) {
4577       Op.addImmOperands(Inst, 1);
4578     } else if (Op.isImm()) {
4579       // Handle optional arguments
4580       OptionalIdx[Op.getImmTy()] = I;
4581     } else {
4582       llvm_unreachable("Invalid operand type");
4583     }
4584   }
4585 
4586   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
4587   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
4588   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
4589 }
4590 
4591 //===----------------------------------------------------------------------===//
4592 // sdwa
4593 //===----------------------------------------------------------------------===//
4594 
4595 OperandMatchResultTy
4596 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
4597                               AMDGPUOperand::ImmTy Type) {
4598   using namespace llvm::AMDGPU::SDWA;
4599 
4600   SMLoc S = Parser.getTok().getLoc();
4601   StringRef Value;
4602   OperandMatchResultTy res;
4603 
4604   res = parseStringWithPrefix(Prefix, Value);
4605   if (res != MatchOperand_Success) {
4606     return res;
4607   }
4608 
4609   int64_t Int;
4610   Int = StringSwitch<int64_t>(Value)
4611         .Case("BYTE_0", SdwaSel::BYTE_0)
4612         .Case("BYTE_1", SdwaSel::BYTE_1)
4613         .Case("BYTE_2", SdwaSel::BYTE_2)
4614         .Case("BYTE_3", SdwaSel::BYTE_3)
4615         .Case("WORD_0", SdwaSel::WORD_0)
4616         .Case("WORD_1", SdwaSel::WORD_1)
4617         .Case("DWORD", SdwaSel::DWORD)
4618         .Default(0xffffffff);
4619   Parser.Lex(); // eat last token
4620 
4621   if (Int == 0xffffffff) {
4622     return MatchOperand_ParseFail;
4623   }
4624 
4625   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
4626   return MatchOperand_Success;
4627 }
4628 
4629 OperandMatchResultTy
4630 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
4631   using namespace llvm::AMDGPU::SDWA;
4632 
4633   SMLoc S = Parser.getTok().getLoc();
4634   StringRef Value;
4635   OperandMatchResultTy res;
4636 
4637   res = parseStringWithPrefix("dst_unused", Value);
4638   if (res != MatchOperand_Success) {
4639     return res;
4640   }
4641 
4642   int64_t Int;
4643   Int = StringSwitch<int64_t>(Value)
4644         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
4645         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
4646         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
4647         .Default(0xffffffff);
4648   Parser.Lex(); // eat last token
4649 
4650   if (Int == 0xffffffff) {
4651     return MatchOperand_ParseFail;
4652   }
4653 
4654   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
4655   return MatchOperand_Success;
4656 }
4657 
4658 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
4659   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
4660 }
4661 
4662 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
4663   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
4664 }
4665 
4666 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
4667   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
4668 }
4669 
4670 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
4671   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
4672 }
4673 
4674 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
4675                               uint64_t BasicInstType, bool skipVcc) {
4676   using namespace llvm::AMDGPU::SDWA;
4677 
4678   OptionalImmIndexMap OptionalIdx;
4679   bool skippedVcc = false;
4680 
4681   unsigned I = 1;
4682   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4683   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4684     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4685   }
4686 
4687   for (unsigned E = Operands.size(); I != E; ++I) {
4688     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4689     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
4690       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
4691       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
4692       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
4693       // Skip VCC only if we didn't skip it on previous iteration.
4694       if (BasicInstType == SIInstrFlags::VOP2 &&
4695           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
4696         skippedVcc = true;
4697         continue;
4698       } else if (BasicInstType == SIInstrFlags::VOPC &&
4699                  Inst.getNumOperands() == 0) {
4700         skippedVcc = true;
4701         continue;
4702       }
4703     }
4704     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4705       Op.addRegWithInputModsOperands(Inst, 2);
4706     } else if (Op.isImm()) {
4707       // Handle optional arguments
4708       OptionalIdx[Op.getImmTy()] = I;
4709     } else {
4710       llvm_unreachable("Invalid operand type");
4711     }
4712     skippedVcc = false;
4713   }
4714 
4715   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
4716       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
4717     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
4718     switch (BasicInstType) {
4719     case SIInstrFlags::VOP1:
4720       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4721       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4722         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4723       }
4724       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4726       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4727       break;
4728 
4729     case SIInstrFlags::VOP2:
4730       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4731       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
4732         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
4733       }
4734       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
4735       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
4736       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4737       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4738       break;
4739 
4740     case SIInstrFlags::VOPC:
4741       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
4742       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
4743       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
4744       break;
4745 
4746     default:
4747       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
4748     }
4749   }
4750 
4751   // special case v_mac_{f16, f32}:
4752   // it has src2 register operand that is tied to dst operand
4753   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
4754       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
4755     auto it = Inst.begin();
4756     std::advance(
4757       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
4758     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
4759   }
4760 }
4761 
4762 /// Force static initialization.
4763 extern "C" void LLVMInitializeAMDGPUAsmParser() {
4764   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
4765   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
4766 }
4767 
4768 #define GET_REGISTER_MATCHER
4769 #define GET_MATCHER_IMPLEMENTATION
4770 #include "AMDGPUGenAsmMatcher.inc"
4771 
4772 // This fuction should be defined after auto-generated include so that we have
4773 // MatchClassKind enum defined
4774 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
4775                                                      unsigned Kind) {
4776   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
4777   // But MatchInstructionImpl() expects to meet token and fails to validate
4778   // operand. This method checks if we are given immediate operand but expect to
4779   // get corresponding token.
4780   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
4781   switch (Kind) {
4782   case MCK_addr64:
4783     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
4784   case MCK_gds:
4785     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
4786   case MCK_glc:
4787     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
4788   case MCK_idxen:
4789     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
4790   case MCK_offen:
4791     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
4792   case MCK_SSrcB32:
4793     // When operands have expression values, they will return true for isToken,
4794     // because it is not possible to distinguish between a token and an
4795     // expression at parse time. MatchInstructionImpl() will always try to
4796     // match an operand as a token, when isToken returns true, and when the
4797     // name of the expression is not a valid token, the match will fail,
4798     // so we need to handle it here.
4799     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
4800   case MCK_SSrcF32:
4801     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
4802   case MCK_SoppBrTarget:
4803     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
4804   case MCK_VReg32OrOff:
4805     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
4806   case MCK_InterpSlot:
4807     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
4808   case MCK_Attr:
4809     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
4810   case MCK_AttrChan:
4811     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
4812   default:
4813     return Match_InvalidOperand;
4814   }
4815 }
4816