1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0;
106       Operand |= Neg ? SISrcMods::NEG : 0;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyHigh
178   };
179 
180   struct TokOp {
181     const char *Data;
182     unsigned Length;
183   };
184 
185   struct ImmOp {
186     int64_t Val;
187     ImmTy Type;
188     bool IsFPImm;
189     Modifiers Mods;
190   };
191 
192   struct RegOp {
193     unsigned RegNo;
194     bool IsForcedVOP3;
195     Modifiers Mods;
196   };
197 
198   union {
199     TokOp Tok;
200     ImmOp Imm;
201     RegOp Reg;
202     const MCExpr *Expr;
203   };
204 
205   bool isToken() const override {
206     if (Kind == Token)
207       return true;
208 
209     if (Kind != Expression || !Expr)
210       return false;
211 
212     // When parsing operands, we can't always tell if something was meant to be
213     // a token, like 'gds', or an expression that references a global variable.
214     // In this case, we assume the string is an expression, and if we need to
215     // interpret is a token, then we treat the symbol name as the token.
216     return isa<MCSymbolRefExpr>(Expr);
217   }
218 
219   bool isImm() const override {
220     return Kind == Immediate;
221   }
222 
223   bool isInlinableImm(MVT type) const;
224   bool isLiteralImm(MVT type) const;
225 
226   bool isRegKind() const {
227     return Kind == Register;
228   }
229 
230   bool isReg() const override {
231     return isRegKind() && !hasModifiers();
232   }
233 
234   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
235     return isRegClass(RCID) || isInlinableImm(type);
236   }
237 
238   bool isRegOrImmWithInt16InputMods() const {
239     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
240   }
241 
242   bool isRegOrImmWithInt32InputMods() const {
243     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
244   }
245 
246   bool isRegOrImmWithInt64InputMods() const {
247     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
248   }
249 
250   bool isRegOrImmWithFP16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
252   }
253 
254   bool isRegOrImmWithFP32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
256   }
257 
258   bool isRegOrImmWithFP64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
260   }
261 
262   bool isVReg() const {
263     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
264            isRegClass(AMDGPU::VReg_64RegClassID) ||
265            isRegClass(AMDGPU::VReg_96RegClassID) ||
266            isRegClass(AMDGPU::VReg_128RegClassID) ||
267            isRegClass(AMDGPU::VReg_256RegClassID) ||
268            isRegClass(AMDGPU::VReg_512RegClassID);
269   }
270 
271   bool isVReg32() const {
272     return isRegClass(AMDGPU::VGPR_32RegClassID);
273   }
274 
275   bool isVReg32OrOff() const {
276     return isOff() || isVReg32();
277   }
278 
279   bool isSDWAOperand(MVT type) const;
280   bool isSDWAFP16Operand() const;
281   bool isSDWAFP32Operand() const;
282   bool isSDWAInt16Operand() const;
283   bool isSDWAInt32Operand() const;
284 
285   bool isImmTy(ImmTy ImmT) const {
286     return isImm() && Imm.Type == ImmT;
287   }
288 
289   bool isImmModifier() const {
290     return isImm() && Imm.Type != ImmTyNone;
291   }
292 
293   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
294   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
295   bool isDMask() const { return isImmTy(ImmTyDMask); }
296   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
297   bool isDA() const { return isImmTy(ImmTyDA); }
298   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
299   bool isLWE() const { return isImmTy(ImmTyLWE); }
300   bool isOff() const { return isImmTy(ImmTyOff); }
301   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
302   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
303   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
304   bool isOffen() const { return isImmTy(ImmTyOffen); }
305   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
306   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
307   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
308   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
309   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
310 
311   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
312   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
313   bool isGDS() const { return isImmTy(ImmTyGDS); }
314   bool isLDS() const { return isImmTy(ImmTyLDS); }
315   bool isGLC() const { return isImmTy(ImmTyGLC); }
316   bool isSLC() const { return isImmTy(ImmTySLC); }
317   bool isTFE() const { return isImmTy(ImmTyTFE); }
318   bool isD16() const { return isImmTy(ImmTyD16); }
319   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
320   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
321   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
322   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
323   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
324   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
325   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
326   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
327   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
328   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
329   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
330   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
331   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
332   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
333   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
334   bool isHigh() const { return isImmTy(ImmTyHigh); }
335 
336   bool isMod() const {
337     return isClampSI() || isOModSI();
338   }
339 
340   bool isRegOrImm() const {
341     return isReg() || isImm();
342   }
343 
344   bool isRegClass(unsigned RCID) const;
345 
346   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
347     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
348   }
349 
350   bool isSCSrcB16() const {
351     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
352   }
353 
354   bool isSCSrcV2B16() const {
355     return isSCSrcB16();
356   }
357 
358   bool isSCSrcB32() const {
359     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
360   }
361 
362   bool isSCSrcB64() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
364   }
365 
366   bool isSCSrcF16() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
368   }
369 
370   bool isSCSrcV2F16() const {
371     return isSCSrcF16();
372   }
373 
374   bool isSCSrcF32() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
376   }
377 
378   bool isSCSrcF64() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
380   }
381 
382   bool isSSrcB32() const {
383     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
384   }
385 
386   bool isSSrcB16() const {
387     return isSCSrcB16() || isLiteralImm(MVT::i16);
388   }
389 
390   bool isSSrcV2B16() const {
391     llvm_unreachable("cannot happen");
392     return isSSrcB16();
393   }
394 
395   bool isSSrcB64() const {
396     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
397     // See isVSrc64().
398     return isSCSrcB64() || isLiteralImm(MVT::i64);
399   }
400 
401   bool isSSrcF32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
403   }
404 
405   bool isSSrcF64() const {
406     return isSCSrcB64() || isLiteralImm(MVT::f64);
407   }
408 
409   bool isSSrcF16() const {
410     return isSCSrcB16() || isLiteralImm(MVT::f16);
411   }
412 
413   bool isSSrcV2F16() const {
414     llvm_unreachable("cannot happen");
415     return isSSrcF16();
416   }
417 
418   bool isSSrcOrLdsB32() const {
419     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
420            isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isVCSrcB32() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
425   }
426 
427   bool isVCSrcB64() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
429   }
430 
431   bool isVCSrcB16() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
433   }
434 
435   bool isVCSrcV2B16() const {
436     return isVCSrcB16();
437   }
438 
439   bool isVCSrcF32() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
441   }
442 
443   bool isVCSrcF64() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
445   }
446 
447   bool isVCSrcF16() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
449   }
450 
451   bool isVCSrcV2F16() const {
452     return isVCSrcF16();
453   }
454 
455   bool isVSrcB32() const {
456     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
457   }
458 
459   bool isVSrcB64() const {
460     return isVCSrcF64() || isLiteralImm(MVT::i64);
461   }
462 
463   bool isVSrcB16() const {
464     return isVCSrcF16() || isLiteralImm(MVT::i16);
465   }
466 
467   bool isVSrcV2B16() const {
468     llvm_unreachable("cannot happen");
469     return isVSrcB16();
470   }
471 
472   bool isVSrcF32() const {
473     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
474   }
475 
476   bool isVSrcF64() const {
477     return isVCSrcF64() || isLiteralImm(MVT::f64);
478   }
479 
480   bool isVSrcF16() const {
481     return isVCSrcF16() || isLiteralImm(MVT::f16);
482   }
483 
484   bool isVSrcV2F16() const {
485     llvm_unreachable("cannot happen");
486     return isVSrcF16();
487   }
488 
489   bool isKImmFP32() const {
490     return isLiteralImm(MVT::f32);
491   }
492 
493   bool isKImmFP16() const {
494     return isLiteralImm(MVT::f16);
495   }
496 
497   bool isMem() const override {
498     return false;
499   }
500 
501   bool isExpr() const {
502     return Kind == Expression;
503   }
504 
505   bool isSoppBrTarget() const {
506     return isExpr() || isImm();
507   }
508 
509   bool isSWaitCnt() const;
510   bool isHwreg() const;
511   bool isSendMsg() const;
512   bool isSwizzle() const;
513   bool isSMRDOffset8() const;
514   bool isSMRDOffset20() const;
515   bool isSMRDLiteralOffset() const;
516   bool isDPPCtrl() const;
517   bool isGPRIdxMode() const;
518   bool isS16Imm() const;
519   bool isU16Imm() const;
520 
521   StringRef getExpressionAsToken() const {
522     assert(isExpr());
523     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
524     return S->getSymbol().getName();
525   }
526 
527   StringRef getToken() const {
528     assert(isToken());
529 
530     if (Kind == Expression)
531       return getExpressionAsToken();
532 
533     return StringRef(Tok.Data, Tok.Length);
534   }
535 
536   int64_t getImm() const {
537     assert(isImm());
538     return Imm.Val;
539   }
540 
541   ImmTy getImmTy() const {
542     assert(isImm());
543     return Imm.Type;
544   }
545 
546   unsigned getReg() const override {
547     return Reg.RegNo;
548   }
549 
550   SMLoc getStartLoc() const override {
551     return StartLoc;
552   }
553 
554   SMLoc getEndLoc() const override {
555     return EndLoc;
556   }
557 
558   SMRange getLocRange() const {
559     return SMRange(StartLoc, EndLoc);
560   }
561 
562   Modifiers getModifiers() const {
563     assert(isRegKind() || isImmTy(ImmTyNone));
564     return isRegKind() ? Reg.Mods : Imm.Mods;
565   }
566 
567   void setModifiers(Modifiers Mods) {
568     assert(isRegKind() || isImmTy(ImmTyNone));
569     if (isRegKind())
570       Reg.Mods = Mods;
571     else
572       Imm.Mods = Mods;
573   }
574 
575   bool hasModifiers() const {
576     return getModifiers().hasModifiers();
577   }
578 
579   bool hasFPModifiers() const {
580     return getModifiers().hasFPModifiers();
581   }
582 
583   bool hasIntModifiers() const {
584     return getModifiers().hasIntModifiers();
585   }
586 
587   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
588 
589   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
590 
591   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
592 
593   template <unsigned Bitwidth>
594   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
595 
596   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
597     addKImmFPOperands<16>(Inst, N);
598   }
599 
600   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
601     addKImmFPOperands<32>(Inst, N);
602   }
603 
604   void addRegOperands(MCInst &Inst, unsigned N) const;
605 
606   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
607     if (isRegKind())
608       addRegOperands(Inst, N);
609     else if (isExpr())
610       Inst.addOperand(MCOperand::createExpr(Expr));
611     else
612       addImmOperands(Inst, N);
613   }
614 
615   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
616     Modifiers Mods = getModifiers();
617     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
618     if (isRegKind()) {
619       addRegOperands(Inst, N);
620     } else {
621       addImmOperands(Inst, N, false);
622     }
623   }
624 
625   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
626     assert(!hasIntModifiers());
627     addRegOrImmWithInputModsOperands(Inst, N);
628   }
629 
630   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
631     assert(!hasFPModifiers());
632     addRegOrImmWithInputModsOperands(Inst, N);
633   }
634 
635   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
636     Modifiers Mods = getModifiers();
637     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
638     assert(isRegKind());
639     addRegOperands(Inst, N);
640   }
641 
642   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
643     assert(!hasIntModifiers());
644     addRegWithInputModsOperands(Inst, N);
645   }
646 
647   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
648     assert(!hasFPModifiers());
649     addRegWithInputModsOperands(Inst, N);
650   }
651 
652   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
653     if (isImm())
654       addImmOperands(Inst, N);
655     else {
656       assert(isExpr());
657       Inst.addOperand(MCOperand::createExpr(Expr));
658     }
659   }
660 
661   static void printImmTy(raw_ostream& OS, ImmTy Type) {
662     switch (Type) {
663     case ImmTyNone: OS << "None"; break;
664     case ImmTyGDS: OS << "GDS"; break;
665     case ImmTyLDS: OS << "LDS"; break;
666     case ImmTyOffen: OS << "Offen"; break;
667     case ImmTyIdxen: OS << "Idxen"; break;
668     case ImmTyAddr64: OS << "Addr64"; break;
669     case ImmTyOffset: OS << "Offset"; break;
670     case ImmTyInstOffset: OS << "InstOffset"; break;
671     case ImmTyOffset0: OS << "Offset0"; break;
672     case ImmTyOffset1: OS << "Offset1"; break;
673     case ImmTyGLC: OS << "GLC"; break;
674     case ImmTySLC: OS << "SLC"; break;
675     case ImmTyTFE: OS << "TFE"; break;
676     case ImmTyD16: OS << "D16"; break;
677     case ImmTyFORMAT: OS << "FORMAT"; break;
678     case ImmTyClampSI: OS << "ClampSI"; break;
679     case ImmTyOModSI: OS << "OModSI"; break;
680     case ImmTyDppCtrl: OS << "DppCtrl"; break;
681     case ImmTyDppRowMask: OS << "DppRowMask"; break;
682     case ImmTyDppBankMask: OS << "DppBankMask"; break;
683     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
684     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
685     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
686     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
687     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
688     case ImmTyDMask: OS << "DMask"; break;
689     case ImmTyUNorm: OS << "UNorm"; break;
690     case ImmTyDA: OS << "DA"; break;
691     case ImmTyR128A16: OS << "R128A16"; break;
692     case ImmTyLWE: OS << "LWE"; break;
693     case ImmTyOff: OS << "Off"; break;
694     case ImmTyExpTgt: OS << "ExpTgt"; break;
695     case ImmTyExpCompr: OS << "ExpCompr"; break;
696     case ImmTyExpVM: OS << "ExpVM"; break;
697     case ImmTyHwreg: OS << "Hwreg"; break;
698     case ImmTySendMsg: OS << "SendMsg"; break;
699     case ImmTyInterpSlot: OS << "InterpSlot"; break;
700     case ImmTyInterpAttr: OS << "InterpAttr"; break;
701     case ImmTyAttrChan: OS << "AttrChan"; break;
702     case ImmTyOpSel: OS << "OpSel"; break;
703     case ImmTyOpSelHi: OS << "OpSelHi"; break;
704     case ImmTyNegLo: OS << "NegLo"; break;
705     case ImmTyNegHi: OS << "NegHi"; break;
706     case ImmTySwizzle: OS << "Swizzle"; break;
707     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
708     case ImmTyHigh: OS << "High"; break;
709     }
710   }
711 
712   void print(raw_ostream &OS) const override {
713     switch (Kind) {
714     case Register:
715       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
716       break;
717     case Immediate:
718       OS << '<' << getImm();
719       if (getImmTy() != ImmTyNone) {
720         OS << " type: "; printImmTy(OS, getImmTy());
721       }
722       OS << " mods: " << Imm.Mods << '>';
723       break;
724     case Token:
725       OS << '\'' << getToken() << '\'';
726       break;
727     case Expression:
728       OS << "<expr " << *Expr << '>';
729       break;
730     }
731   }
732 
733   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
734                                       int64_t Val, SMLoc Loc,
735                                       ImmTy Type = ImmTyNone,
736                                       bool IsFPImm = false) {
737     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
738     Op->Imm.Val = Val;
739     Op->Imm.IsFPImm = IsFPImm;
740     Op->Imm.Type = Type;
741     Op->Imm.Mods = Modifiers();
742     Op->StartLoc = Loc;
743     Op->EndLoc = Loc;
744     return Op;
745   }
746 
747   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
748                                         StringRef Str, SMLoc Loc,
749                                         bool HasExplicitEncodingSize = true) {
750     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
751     Res->Tok.Data = Str.data();
752     Res->Tok.Length = Str.size();
753     Res->StartLoc = Loc;
754     Res->EndLoc = Loc;
755     return Res;
756   }
757 
758   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
759                                       unsigned RegNo, SMLoc S,
760                                       SMLoc E,
761                                       bool ForceVOP3) {
762     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
763     Op->Reg.RegNo = RegNo;
764     Op->Reg.Mods = Modifiers();
765     Op->Reg.IsForcedVOP3 = ForceVOP3;
766     Op->StartLoc = S;
767     Op->EndLoc = E;
768     return Op;
769   }
770 
771   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
772                                        const class MCExpr *Expr, SMLoc S) {
773     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
774     Op->Expr = Expr;
775     Op->StartLoc = S;
776     Op->EndLoc = S;
777     return Op;
778   }
779 };
780 
781 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
782   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
783   return OS;
784 }
785 
786 //===----------------------------------------------------------------------===//
787 // AsmParser
788 //===----------------------------------------------------------------------===//
789 
790 // Holds info related to the current kernel, e.g. count of SGPRs used.
791 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
792 // .amdgpu_hsa_kernel or at EOF.
793 class KernelScopeInfo {
794   int SgprIndexUnusedMin = -1;
795   int VgprIndexUnusedMin = -1;
796   MCContext *Ctx = nullptr;
797 
798   void usesSgprAt(int i) {
799     if (i >= SgprIndexUnusedMin) {
800       SgprIndexUnusedMin = ++i;
801       if (Ctx) {
802         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
803         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
804       }
805     }
806   }
807 
808   void usesVgprAt(int i) {
809     if (i >= VgprIndexUnusedMin) {
810       VgprIndexUnusedMin = ++i;
811       if (Ctx) {
812         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
813         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
814       }
815     }
816   }
817 
818 public:
819   KernelScopeInfo() = default;
820 
821   void initialize(MCContext &Context) {
822     Ctx = &Context;
823     usesSgprAt(SgprIndexUnusedMin = -1);
824     usesVgprAt(VgprIndexUnusedMin = -1);
825   }
826 
827   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
828     switch (RegKind) {
829       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
830       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
831       default: break;
832     }
833   }
834 };
835 
836 class AMDGPUAsmParser : public MCTargetAsmParser {
837   MCAsmParser &Parser;
838 
839   // Number of extra operands parsed after the first optional operand.
840   // This may be necessary to skip hardcoded mandatory operands.
841   static const unsigned MAX_OPR_LOOKAHEAD = 8;
842 
843   unsigned ForcedEncodingSize = 0;
844   bool ForcedDPP = false;
845   bool ForcedSDWA = false;
846   KernelScopeInfo KernelScope;
847 
848   /// @name Auto-generated Match Functions
849   /// {
850 
851 #define GET_ASSEMBLER_HEADER
852 #include "AMDGPUGenAsmMatcher.inc"
853 
854   /// }
855 
856 private:
857   bool ParseAsAbsoluteExpression(uint32_t &Ret);
858   bool OutOfRangeError(SMRange Range);
859   /// Calculate VGPR/SGPR blocks required for given target, reserved
860   /// registers, and user-specified NextFreeXGPR values.
861   ///
862   /// \param Features [in] Target features, used for bug corrections.
863   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
864   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
865   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
866   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
867   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
868   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
869   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
870   /// \param VGPRBlocks [out] Result VGPR block count.
871   /// \param SGPRBlocks [out] Result SGPR block count.
872   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
873                           bool FlatScrUsed, bool XNACKUsed,
874                           unsigned NextFreeVGPR, SMRange VGPRRange,
875                           unsigned NextFreeSGPR, SMRange SGPRRange,
876                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
877   bool ParseDirectiveAMDGCNTarget();
878   bool ParseDirectiveAMDHSAKernel();
879   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
880   bool ParseDirectiveHSACodeObjectVersion();
881   bool ParseDirectiveHSACodeObjectISA();
882   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
883   bool ParseDirectiveAMDKernelCodeT();
884   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
885   bool ParseDirectiveAMDGPUHsaKernel();
886 
887   bool ParseDirectiveISAVersion();
888   bool ParseDirectiveHSAMetadata();
889   bool ParseDirectivePALMetadata();
890 
891   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
892                              RegisterKind RegKind, unsigned Reg1,
893                              unsigned RegNum);
894   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
895                            unsigned& RegNum, unsigned& RegWidth,
896                            unsigned *DwordRegIndex);
897   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
898   void initializeGprCountSymbol(RegisterKind RegKind);
899   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
900                              unsigned RegWidth);
901   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
902                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
903   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
904                  bool IsGdsHardcoded);
905 
906 public:
907   enum AMDGPUMatchResultTy {
908     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
909   };
910 
911   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
912 
913   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
914                const MCInstrInfo &MII,
915                const MCTargetOptions &Options)
916       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
917     MCAsmParserExtension::Initialize(Parser);
918 
919     if (getFeatureBits().none()) {
920       // Set default features.
921       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
922     }
923 
924     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
925 
926     {
927       // TODO: make those pre-defined variables read-only.
928       // Currently there is none suitable machinery in the core llvm-mc for this.
929       // MCSymbol::isRedefinable is intended for another purpose, and
930       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
931       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
932       MCContext &Ctx = getContext();
933       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
934         MCSymbol *Sym =
935             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
936         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
937         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
938         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
939         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
940         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
941       } else {
942         MCSymbol *Sym =
943             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
944         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
945         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
946         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
947         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
948         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
949       }
950       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
951         initializeGprCountSymbol(IS_VGPR);
952         initializeGprCountSymbol(IS_SGPR);
953       } else
954         KernelScope.initialize(getContext());
955     }
956   }
957 
958   bool hasXNACK() const {
959     return AMDGPU::hasXNACK(getSTI());
960   }
961 
962   bool hasMIMG_R128() const {
963     return AMDGPU::hasMIMG_R128(getSTI());
964   }
965 
966   bool hasPackedD16() const {
967     return AMDGPU::hasPackedD16(getSTI());
968   }
969 
970   bool isSI() const {
971     return AMDGPU::isSI(getSTI());
972   }
973 
974   bool isCI() const {
975     return AMDGPU::isCI(getSTI());
976   }
977 
978   bool isVI() const {
979     return AMDGPU::isVI(getSTI());
980   }
981 
982   bool isGFX9() const {
983     return AMDGPU::isGFX9(getSTI());
984   }
985 
986   bool hasInv2PiInlineImm() const {
987     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
988   }
989 
990   bool hasFlatOffsets() const {
991     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
992   }
993 
994   bool hasSGPR102_SGPR103() const {
995     return !isVI();
996   }
997 
998   bool hasIntClamp() const {
999     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1000   }
1001 
1002   AMDGPUTargetStreamer &getTargetStreamer() {
1003     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1004     return static_cast<AMDGPUTargetStreamer &>(TS);
1005   }
1006 
1007   const MCRegisterInfo *getMRI() const {
1008     // We need this const_cast because for some reason getContext() is not const
1009     // in MCAsmParser.
1010     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1011   }
1012 
1013   const MCInstrInfo *getMII() const {
1014     return &MII;
1015   }
1016 
1017   const FeatureBitset &getFeatureBits() const {
1018     return getSTI().getFeatureBits();
1019   }
1020 
1021   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1022   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1023   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1024 
1025   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1026   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1027   bool isForcedDPP() const { return ForcedDPP; }
1028   bool isForcedSDWA() const { return ForcedSDWA; }
1029   ArrayRef<unsigned> getMatchedVariants() const;
1030 
1031   std::unique_ptr<AMDGPUOperand> parseRegister();
1032   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1033   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1034   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1035                                       unsigned Kind) override;
1036   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1037                                OperandVector &Operands, MCStreamer &Out,
1038                                uint64_t &ErrorInfo,
1039                                bool MatchingInlineAsm) override;
1040   bool ParseDirective(AsmToken DirectiveID) override;
1041   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1042   StringRef parseMnemonicSuffix(StringRef Name);
1043   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1044                         SMLoc NameLoc, OperandVector &Operands) override;
1045   //bool ProcessInstruction(MCInst &Inst);
1046 
1047   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1048 
1049   OperandMatchResultTy
1050   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1051                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1052                      bool (*ConvertResult)(int64_t &) = nullptr);
1053 
1054   OperandMatchResultTy parseOperandArrayWithPrefix(
1055     const char *Prefix,
1056     OperandVector &Operands,
1057     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1058     bool (*ConvertResult)(int64_t&) = nullptr);
1059 
1060   OperandMatchResultTy
1061   parseNamedBit(const char *Name, OperandVector &Operands,
1062                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1063   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1064                                              StringRef &Value);
1065 
1066   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1067   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1068   OperandMatchResultTy parseReg(OperandVector &Operands);
1069   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1070   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1071   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1072   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1073   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1074   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1075   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1076 
1077   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1078   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1079   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1080   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1081 
1082   bool parseCnt(int64_t &IntVal);
1083   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1084   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1085 
1086 private:
1087   struct OperandInfoTy {
1088     int64_t Id;
1089     bool IsSymbolic = false;
1090 
1091     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1092   };
1093 
1094   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1095   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1096 
1097   void errorExpTgt();
1098   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1099 
1100   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1101   bool validateSOPLiteral(const MCInst &Inst) const;
1102   bool validateConstantBusLimitations(const MCInst &Inst);
1103   bool validateEarlyClobberLimitations(const MCInst &Inst);
1104   bool validateIntClampSupported(const MCInst &Inst);
1105   bool validateMIMGAtomicDMask(const MCInst &Inst);
1106   bool validateMIMGGatherDMask(const MCInst &Inst);
1107   bool validateMIMGDataSize(const MCInst &Inst);
1108   bool validateMIMGD16(const MCInst &Inst);
1109   bool validateLdsDirect(const MCInst &Inst);
1110   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1111   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1112   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1113 
1114   bool trySkipId(const StringRef Id);
1115   bool trySkipToken(const AsmToken::TokenKind Kind);
1116   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1117   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1118   bool parseExpr(int64_t &Imm);
1119 
1120 public:
1121   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1122   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1123 
1124   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1125   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1126   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1127   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1128   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1129 
1130   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1131                             const unsigned MinVal,
1132                             const unsigned MaxVal,
1133                             const StringRef ErrMsg);
1134   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1135   bool parseSwizzleOffset(int64_t &Imm);
1136   bool parseSwizzleMacro(int64_t &Imm);
1137   bool parseSwizzleQuadPerm(int64_t &Imm);
1138   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1139   bool parseSwizzleBroadcast(int64_t &Imm);
1140   bool parseSwizzleSwap(int64_t &Imm);
1141   bool parseSwizzleReverse(int64_t &Imm);
1142 
1143   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1144   int64_t parseGPRIdxMacro();
1145 
1146   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1147   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1148   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1149   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1150   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1151 
1152   AMDGPUOperand::Ptr defaultGLC() const;
1153   AMDGPUOperand::Ptr defaultSLC() const;
1154 
1155   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1156   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1157   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1158   AMDGPUOperand::Ptr defaultOffsetU12() const;
1159   AMDGPUOperand::Ptr defaultOffsetS13() const;
1160 
1161   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1162 
1163   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1164                OptionalImmIndexMap &OptionalIdx);
1165   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1166   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1167   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1168 
1169   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1170 
1171   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1172                bool IsAtomic = false);
1173   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1174 
1175   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1176   AMDGPUOperand::Ptr defaultRowMask() const;
1177   AMDGPUOperand::Ptr defaultBankMask() const;
1178   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1179   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1180 
1181   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1182                                     AMDGPUOperand::ImmTy Type);
1183   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1184   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1185   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1186   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1187   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1188   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1189                 uint64_t BasicInstType, bool skipVcc = false);
1190 };
1191 
1192 struct OptionalOperand {
1193   const char *Name;
1194   AMDGPUOperand::ImmTy Type;
1195   bool IsBit;
1196   bool (*ConvertResult)(int64_t&);
1197 };
1198 
1199 } // end anonymous namespace
1200 
1201 // May be called with integer type with equivalent bitwidth.
1202 static const fltSemantics *getFltSemantics(unsigned Size) {
1203   switch (Size) {
1204   case 4:
1205     return &APFloat::IEEEsingle();
1206   case 8:
1207     return &APFloat::IEEEdouble();
1208   case 2:
1209     return &APFloat::IEEEhalf();
1210   default:
1211     llvm_unreachable("unsupported fp type");
1212   }
1213 }
1214 
1215 static const fltSemantics *getFltSemantics(MVT VT) {
1216   return getFltSemantics(VT.getSizeInBits() / 8);
1217 }
1218 
1219 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1220   switch (OperandType) {
1221   case AMDGPU::OPERAND_REG_IMM_INT32:
1222   case AMDGPU::OPERAND_REG_IMM_FP32:
1223   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1224   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1225     return &APFloat::IEEEsingle();
1226   case AMDGPU::OPERAND_REG_IMM_INT64:
1227   case AMDGPU::OPERAND_REG_IMM_FP64:
1228   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1229   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1230     return &APFloat::IEEEdouble();
1231   case AMDGPU::OPERAND_REG_IMM_INT16:
1232   case AMDGPU::OPERAND_REG_IMM_FP16:
1233   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1234   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1235   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1236   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1237     return &APFloat::IEEEhalf();
1238   default:
1239     llvm_unreachable("unsupported fp type");
1240   }
1241 }
1242 
1243 //===----------------------------------------------------------------------===//
1244 // Operand
1245 //===----------------------------------------------------------------------===//
1246 
1247 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1248   bool Lost;
1249 
1250   // Convert literal to single precision
1251   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1252                                                APFloat::rmNearestTiesToEven,
1253                                                &Lost);
1254   // We allow precision lost but not overflow or underflow
1255   if (Status != APFloat::opOK &&
1256       Lost &&
1257       ((Status & APFloat::opOverflow)  != 0 ||
1258        (Status & APFloat::opUnderflow) != 0)) {
1259     return false;
1260   }
1261 
1262   return true;
1263 }
1264 
1265 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1266   if (!isImmTy(ImmTyNone)) {
1267     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1268     return false;
1269   }
1270   // TODO: We should avoid using host float here. It would be better to
1271   // check the float bit values which is what a few other places do.
1272   // We've had bot failures before due to weird NaN support on mips hosts.
1273 
1274   APInt Literal(64, Imm.Val);
1275 
1276   if (Imm.IsFPImm) { // We got fp literal token
1277     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1278       return AMDGPU::isInlinableLiteral64(Imm.Val,
1279                                           AsmParser->hasInv2PiInlineImm());
1280     }
1281 
1282     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1283     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1284       return false;
1285 
1286     if (type.getScalarSizeInBits() == 16) {
1287       return AMDGPU::isInlinableLiteral16(
1288         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1289         AsmParser->hasInv2PiInlineImm());
1290     }
1291 
1292     // Check if single precision literal is inlinable
1293     return AMDGPU::isInlinableLiteral32(
1294       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1295       AsmParser->hasInv2PiInlineImm());
1296   }
1297 
1298   // We got int literal token.
1299   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1300     return AMDGPU::isInlinableLiteral64(Imm.Val,
1301                                         AsmParser->hasInv2PiInlineImm());
1302   }
1303 
1304   if (type.getScalarSizeInBits() == 16) {
1305     return AMDGPU::isInlinableLiteral16(
1306       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1307       AsmParser->hasInv2PiInlineImm());
1308   }
1309 
1310   return AMDGPU::isInlinableLiteral32(
1311     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1312     AsmParser->hasInv2PiInlineImm());
1313 }
1314 
1315 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1316   // Check that this immediate can be added as literal
1317   if (!isImmTy(ImmTyNone)) {
1318     return false;
1319   }
1320 
1321   if (!Imm.IsFPImm) {
1322     // We got int literal token.
1323 
1324     if (type == MVT::f64 && hasFPModifiers()) {
1325       // Cannot apply fp modifiers to int literals preserving the same semantics
1326       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1327       // disable these cases.
1328       return false;
1329     }
1330 
1331     unsigned Size = type.getSizeInBits();
1332     if (Size == 64)
1333       Size = 32;
1334 
1335     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1336     // types.
1337     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1338   }
1339 
1340   // We got fp literal token
1341   if (type == MVT::f64) { // Expected 64-bit fp operand
1342     // We would set low 64-bits of literal to zeroes but we accept this literals
1343     return true;
1344   }
1345 
1346   if (type == MVT::i64) { // Expected 64-bit int operand
1347     // We don't allow fp literals in 64-bit integer instructions. It is
1348     // unclear how we should encode them.
1349     return false;
1350   }
1351 
1352   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1353   return canLosslesslyConvertToFPType(FPLiteral, type);
1354 }
1355 
1356 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1357   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1358 }
1359 
1360 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1361   if (AsmParser->isVI())
1362     return isVReg32();
1363   else if (AsmParser->isGFX9())
1364     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1365   else
1366     return false;
1367 }
1368 
1369 bool AMDGPUOperand::isSDWAFP16Operand() const {
1370   return isSDWAOperand(MVT::f16);
1371 }
1372 
1373 bool AMDGPUOperand::isSDWAFP32Operand() const {
1374   return isSDWAOperand(MVT::f32);
1375 }
1376 
1377 bool AMDGPUOperand::isSDWAInt16Operand() const {
1378   return isSDWAOperand(MVT::i16);
1379 }
1380 
1381 bool AMDGPUOperand::isSDWAInt32Operand() const {
1382   return isSDWAOperand(MVT::i32);
1383 }
1384 
1385 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1386 {
1387   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1388   assert(Size == 2 || Size == 4 || Size == 8);
1389 
1390   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1391 
1392   if (Imm.Mods.Abs) {
1393     Val &= ~FpSignMask;
1394   }
1395   if (Imm.Mods.Neg) {
1396     Val ^= FpSignMask;
1397   }
1398 
1399   return Val;
1400 }
1401 
1402 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1403   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1404                              Inst.getNumOperands())) {
1405     addLiteralImmOperand(Inst, Imm.Val,
1406                          ApplyModifiers &
1407                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1408   } else {
1409     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1410     Inst.addOperand(MCOperand::createImm(Imm.Val));
1411   }
1412 }
1413 
1414 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1415   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1416   auto OpNum = Inst.getNumOperands();
1417   // Check that this operand accepts literals
1418   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1419 
1420   if (ApplyModifiers) {
1421     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1422     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1423     Val = applyInputFPModifiers(Val, Size);
1424   }
1425 
1426   APInt Literal(64, Val);
1427   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1428 
1429   if (Imm.IsFPImm) { // We got fp literal token
1430     switch (OpTy) {
1431     case AMDGPU::OPERAND_REG_IMM_INT64:
1432     case AMDGPU::OPERAND_REG_IMM_FP64:
1433     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1434     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1435       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1436                                        AsmParser->hasInv2PiInlineImm())) {
1437         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1438         return;
1439       }
1440 
1441       // Non-inlineable
1442       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1443         // For fp operands we check if low 32 bits are zeros
1444         if (Literal.getLoBits(32) != 0) {
1445           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1446           "Can't encode literal as exact 64-bit floating-point operand. "
1447           "Low 32-bits will be set to zero");
1448         }
1449 
1450         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1451         return;
1452       }
1453 
1454       // We don't allow fp literals in 64-bit integer instructions. It is
1455       // unclear how we should encode them. This case should be checked earlier
1456       // in predicate methods (isLiteralImm())
1457       llvm_unreachable("fp literal in 64-bit integer instruction.");
1458 
1459     case AMDGPU::OPERAND_REG_IMM_INT32:
1460     case AMDGPU::OPERAND_REG_IMM_FP32:
1461     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1462     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1463     case AMDGPU::OPERAND_REG_IMM_INT16:
1464     case AMDGPU::OPERAND_REG_IMM_FP16:
1465     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1466     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1467     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1468     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1469       bool lost;
1470       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1471       // Convert literal to single precision
1472       FPLiteral.convert(*getOpFltSemantics(OpTy),
1473                         APFloat::rmNearestTiesToEven, &lost);
1474       // We allow precision lost but not overflow or underflow. This should be
1475       // checked earlier in isLiteralImm()
1476 
1477       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1478       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1479           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1480         ImmVal |= (ImmVal << 16);
1481       }
1482 
1483       Inst.addOperand(MCOperand::createImm(ImmVal));
1484       return;
1485     }
1486     default:
1487       llvm_unreachable("invalid operand size");
1488     }
1489 
1490     return;
1491   }
1492 
1493    // We got int literal token.
1494   // Only sign extend inline immediates.
1495   // FIXME: No errors on truncation
1496   switch (OpTy) {
1497   case AMDGPU::OPERAND_REG_IMM_INT32:
1498   case AMDGPU::OPERAND_REG_IMM_FP32:
1499   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1500   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1501     if (isInt<32>(Val) &&
1502         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1503                                      AsmParser->hasInv2PiInlineImm())) {
1504       Inst.addOperand(MCOperand::createImm(Val));
1505       return;
1506     }
1507 
1508     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1509     return;
1510 
1511   case AMDGPU::OPERAND_REG_IMM_INT64:
1512   case AMDGPU::OPERAND_REG_IMM_FP64:
1513   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1514   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1515     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1516       Inst.addOperand(MCOperand::createImm(Val));
1517       return;
1518     }
1519 
1520     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1521     return;
1522 
1523   case AMDGPU::OPERAND_REG_IMM_INT16:
1524   case AMDGPU::OPERAND_REG_IMM_FP16:
1525   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1526   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1527     if (isInt<16>(Val) &&
1528         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1529                                      AsmParser->hasInv2PiInlineImm())) {
1530       Inst.addOperand(MCOperand::createImm(Val));
1531       return;
1532     }
1533 
1534     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1535     return;
1536 
1537   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1539     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1540     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1541                                         AsmParser->hasInv2PiInlineImm()));
1542 
1543     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1544                       static_cast<uint32_t>(LiteralVal);
1545     Inst.addOperand(MCOperand::createImm(ImmVal));
1546     return;
1547   }
1548   default:
1549     llvm_unreachable("invalid operand size");
1550   }
1551 }
1552 
1553 template <unsigned Bitwidth>
1554 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1555   APInt Literal(64, Imm.Val);
1556 
1557   if (!Imm.IsFPImm) {
1558     // We got int literal token.
1559     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1560     return;
1561   }
1562 
1563   bool Lost;
1564   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1565   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1566                     APFloat::rmNearestTiesToEven, &Lost);
1567   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1568 }
1569 
1570 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1571   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1572 }
1573 
1574 //===----------------------------------------------------------------------===//
1575 // AsmParser
1576 //===----------------------------------------------------------------------===//
1577 
1578 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1579   if (Is == IS_VGPR) {
1580     switch (RegWidth) {
1581       default: return -1;
1582       case 1: return AMDGPU::VGPR_32RegClassID;
1583       case 2: return AMDGPU::VReg_64RegClassID;
1584       case 3: return AMDGPU::VReg_96RegClassID;
1585       case 4: return AMDGPU::VReg_128RegClassID;
1586       case 8: return AMDGPU::VReg_256RegClassID;
1587       case 16: return AMDGPU::VReg_512RegClassID;
1588     }
1589   } else if (Is == IS_TTMP) {
1590     switch (RegWidth) {
1591       default: return -1;
1592       case 1: return AMDGPU::TTMP_32RegClassID;
1593       case 2: return AMDGPU::TTMP_64RegClassID;
1594       case 4: return AMDGPU::TTMP_128RegClassID;
1595       case 8: return AMDGPU::TTMP_256RegClassID;
1596       case 16: return AMDGPU::TTMP_512RegClassID;
1597     }
1598   } else if (Is == IS_SGPR) {
1599     switch (RegWidth) {
1600       default: return -1;
1601       case 1: return AMDGPU::SGPR_32RegClassID;
1602       case 2: return AMDGPU::SGPR_64RegClassID;
1603       case 4: return AMDGPU::SGPR_128RegClassID;
1604       case 8: return AMDGPU::SGPR_256RegClassID;
1605       case 16: return AMDGPU::SGPR_512RegClassID;
1606     }
1607   }
1608   return -1;
1609 }
1610 
1611 static unsigned getSpecialRegForName(StringRef RegName) {
1612   return StringSwitch<unsigned>(RegName)
1613     .Case("exec", AMDGPU::EXEC)
1614     .Case("vcc", AMDGPU::VCC)
1615     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1616     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1617     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1618     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1619     .Case("m0", AMDGPU::M0)
1620     .Case("scc", AMDGPU::SCC)
1621     .Case("tba", AMDGPU::TBA)
1622     .Case("tma", AMDGPU::TMA)
1623     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1624     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1625     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1626     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1627     .Case("vcc_lo", AMDGPU::VCC_LO)
1628     .Case("vcc_hi", AMDGPU::VCC_HI)
1629     .Case("exec_lo", AMDGPU::EXEC_LO)
1630     .Case("exec_hi", AMDGPU::EXEC_HI)
1631     .Case("tma_lo", AMDGPU::TMA_LO)
1632     .Case("tma_hi", AMDGPU::TMA_HI)
1633     .Case("tba_lo", AMDGPU::TBA_LO)
1634     .Case("tba_hi", AMDGPU::TBA_HI)
1635     .Default(0);
1636 }
1637 
1638 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1639                                     SMLoc &EndLoc) {
1640   auto R = parseRegister();
1641   if (!R) return true;
1642   assert(R->isReg());
1643   RegNo = R->getReg();
1644   StartLoc = R->getStartLoc();
1645   EndLoc = R->getEndLoc();
1646   return false;
1647 }
1648 
1649 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1650                                             RegisterKind RegKind, unsigned Reg1,
1651                                             unsigned RegNum) {
1652   switch (RegKind) {
1653   case IS_SPECIAL:
1654     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1655       Reg = AMDGPU::EXEC;
1656       RegWidth = 2;
1657       return true;
1658     }
1659     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1660       Reg = AMDGPU::FLAT_SCR;
1661       RegWidth = 2;
1662       return true;
1663     }
1664     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1665       Reg = AMDGPU::XNACK_MASK;
1666       RegWidth = 2;
1667       return true;
1668     }
1669     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1670       Reg = AMDGPU::VCC;
1671       RegWidth = 2;
1672       return true;
1673     }
1674     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1675       Reg = AMDGPU::TBA;
1676       RegWidth = 2;
1677       return true;
1678     }
1679     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1680       Reg = AMDGPU::TMA;
1681       RegWidth = 2;
1682       return true;
1683     }
1684     return false;
1685   case IS_VGPR:
1686   case IS_SGPR:
1687   case IS_TTMP:
1688     if (Reg1 != Reg + RegWidth) {
1689       return false;
1690     }
1691     RegWidth++;
1692     return true;
1693   default:
1694     llvm_unreachable("unexpected register kind");
1695   }
1696 }
1697 
1698 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1699                                           unsigned &RegNum, unsigned &RegWidth,
1700                                           unsigned *DwordRegIndex) {
1701   if (DwordRegIndex) { *DwordRegIndex = 0; }
1702   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1703   if (getLexer().is(AsmToken::Identifier)) {
1704     StringRef RegName = Parser.getTok().getString();
1705     if ((Reg = getSpecialRegForName(RegName))) {
1706       Parser.Lex();
1707       RegKind = IS_SPECIAL;
1708     } else {
1709       unsigned RegNumIndex = 0;
1710       if (RegName[0] == 'v') {
1711         RegNumIndex = 1;
1712         RegKind = IS_VGPR;
1713       } else if (RegName[0] == 's') {
1714         RegNumIndex = 1;
1715         RegKind = IS_SGPR;
1716       } else if (RegName.startswith("ttmp")) {
1717         RegNumIndex = strlen("ttmp");
1718         RegKind = IS_TTMP;
1719       } else {
1720         return false;
1721       }
1722       if (RegName.size() > RegNumIndex) {
1723         // Single 32-bit register: vXX.
1724         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1725           return false;
1726         Parser.Lex();
1727         RegWidth = 1;
1728       } else {
1729         // Range of registers: v[XX:YY]. ":YY" is optional.
1730         Parser.Lex();
1731         int64_t RegLo, RegHi;
1732         if (getLexer().isNot(AsmToken::LBrac))
1733           return false;
1734         Parser.Lex();
1735 
1736         if (getParser().parseAbsoluteExpression(RegLo))
1737           return false;
1738 
1739         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1740         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1741           return false;
1742         Parser.Lex();
1743 
1744         if (isRBrace) {
1745           RegHi = RegLo;
1746         } else {
1747           if (getParser().parseAbsoluteExpression(RegHi))
1748             return false;
1749 
1750           if (getLexer().isNot(AsmToken::RBrac))
1751             return false;
1752           Parser.Lex();
1753         }
1754         RegNum = (unsigned) RegLo;
1755         RegWidth = (RegHi - RegLo) + 1;
1756       }
1757     }
1758   } else if (getLexer().is(AsmToken::LBrac)) {
1759     // List of consecutive registers: [s0,s1,s2,s3]
1760     Parser.Lex();
1761     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1762       return false;
1763     if (RegWidth != 1)
1764       return false;
1765     RegisterKind RegKind1;
1766     unsigned Reg1, RegNum1, RegWidth1;
1767     do {
1768       if (getLexer().is(AsmToken::Comma)) {
1769         Parser.Lex();
1770       } else if (getLexer().is(AsmToken::RBrac)) {
1771         Parser.Lex();
1772         break;
1773       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1774         if (RegWidth1 != 1) {
1775           return false;
1776         }
1777         if (RegKind1 != RegKind) {
1778           return false;
1779         }
1780         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1781           return false;
1782         }
1783       } else {
1784         return false;
1785       }
1786     } while (true);
1787   } else {
1788     return false;
1789   }
1790   switch (RegKind) {
1791   case IS_SPECIAL:
1792     RegNum = 0;
1793     RegWidth = 1;
1794     break;
1795   case IS_VGPR:
1796   case IS_SGPR:
1797   case IS_TTMP:
1798   {
1799     unsigned Size = 1;
1800     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1801       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1802       Size = std::min(RegWidth, 4u);
1803     }
1804     if (RegNum % Size != 0)
1805       return false;
1806     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1807     RegNum = RegNum / Size;
1808     int RCID = getRegClass(RegKind, RegWidth);
1809     if (RCID == -1)
1810       return false;
1811     const MCRegisterClass RC = TRI->getRegClass(RCID);
1812     if (RegNum >= RC.getNumRegs())
1813       return false;
1814     Reg = RC.getRegister(RegNum);
1815     break;
1816   }
1817 
1818   default:
1819     llvm_unreachable("unexpected register kind");
1820   }
1821 
1822   if (!subtargetHasRegister(*TRI, Reg))
1823     return false;
1824   return true;
1825 }
1826 
1827 Optional<StringRef>
1828 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1829   switch (RegKind) {
1830   case IS_VGPR:
1831     return StringRef(".amdgcn.next_free_vgpr");
1832   case IS_SGPR:
1833     return StringRef(".amdgcn.next_free_sgpr");
1834   default:
1835     return None;
1836   }
1837 }
1838 
1839 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1840   auto SymbolName = getGprCountSymbolName(RegKind);
1841   assert(SymbolName && "initializing invalid register kind");
1842   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1843   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1844 }
1845 
1846 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1847                                             unsigned DwordRegIndex,
1848                                             unsigned RegWidth) {
1849   // Symbols are only defined for GCN targets
1850   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1851     return true;
1852 
1853   auto SymbolName = getGprCountSymbolName(RegKind);
1854   if (!SymbolName)
1855     return true;
1856   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1857 
1858   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1859   int64_t OldCount;
1860 
1861   if (!Sym->isVariable())
1862     return !Error(getParser().getTok().getLoc(),
1863                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1864   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1865     return !Error(
1866         getParser().getTok().getLoc(),
1867         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1868 
1869   if (OldCount <= NewMax)
1870     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1871 
1872   return true;
1873 }
1874 
1875 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1876   const auto &Tok = Parser.getTok();
1877   SMLoc StartLoc = Tok.getLoc();
1878   SMLoc EndLoc = Tok.getEndLoc();
1879   RegisterKind RegKind;
1880   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1881 
1882   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1883     return nullptr;
1884   }
1885   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1886     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1887       return nullptr;
1888   } else
1889     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1890   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1891 }
1892 
1893 bool
1894 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1895   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1896       (getLexer().getKind() == AsmToken::Integer ||
1897        getLexer().getKind() == AsmToken::Real)) {
1898     // This is a workaround for handling operands like these:
1899     //     |1.0|
1900     //     |-1|
1901     // This syntax is not compatible with syntax of standard
1902     // MC expressions (due to the trailing '|').
1903 
1904     SMLoc EndLoc;
1905     const MCExpr *Expr;
1906 
1907     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1908       return true;
1909     }
1910 
1911     return !Expr->evaluateAsAbsolute(Val);
1912   }
1913 
1914   return getParser().parseAbsoluteExpression(Val);
1915 }
1916 
1917 OperandMatchResultTy
1918 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1919   // TODO: add syntactic sugar for 1/(2*PI)
1920   bool Minus = false;
1921   if (getLexer().getKind() == AsmToken::Minus) {
1922     const AsmToken NextToken = getLexer().peekTok();
1923     if (!NextToken.is(AsmToken::Integer) &&
1924         !NextToken.is(AsmToken::Real)) {
1925         return MatchOperand_NoMatch;
1926     }
1927     Minus = true;
1928     Parser.Lex();
1929   }
1930 
1931   SMLoc S = Parser.getTok().getLoc();
1932   switch(getLexer().getKind()) {
1933   case AsmToken::Integer: {
1934     int64_t IntVal;
1935     if (parseAbsoluteExpr(IntVal, AbsMod))
1936       return MatchOperand_ParseFail;
1937     if (Minus)
1938       IntVal *= -1;
1939     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1940     return MatchOperand_Success;
1941   }
1942   case AsmToken::Real: {
1943     int64_t IntVal;
1944     if (parseAbsoluteExpr(IntVal, AbsMod))
1945       return MatchOperand_ParseFail;
1946 
1947     APFloat F(BitsToDouble(IntVal));
1948     if (Minus)
1949       F.changeSign();
1950     Operands.push_back(
1951         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1952                                  AMDGPUOperand::ImmTyNone, true));
1953     return MatchOperand_Success;
1954   }
1955   default:
1956     return MatchOperand_NoMatch;
1957   }
1958 }
1959 
1960 OperandMatchResultTy
1961 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1962   if (auto R = parseRegister()) {
1963     assert(R->isReg());
1964     R->Reg.IsForcedVOP3 = isForcedVOP3();
1965     Operands.push_back(std::move(R));
1966     return MatchOperand_Success;
1967   }
1968   return MatchOperand_NoMatch;
1969 }
1970 
1971 OperandMatchResultTy
1972 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1973   auto res = parseImm(Operands, AbsMod);
1974   if (res != MatchOperand_NoMatch) {
1975     return res;
1976   }
1977 
1978   return parseReg(Operands);
1979 }
1980 
1981 OperandMatchResultTy
1982 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1983                                               bool AllowImm) {
1984   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1985 
1986   if (getLexer().getKind()== AsmToken::Minus) {
1987     const AsmToken NextToken = getLexer().peekTok();
1988 
1989     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1990     if (NextToken.is(AsmToken::Minus)) {
1991       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1992       return MatchOperand_ParseFail;
1993     }
1994 
1995     // '-' followed by an integer literal N should be interpreted as integer
1996     // negation rather than a floating-point NEG modifier applied to N.
1997     // Beside being contr-intuitive, such use of floating-point NEG modifier
1998     // results in different meaning of integer literals used with VOP1/2/C
1999     // and VOP3, for example:
2000     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2001     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2002     // Negative fp literals should be handled likewise for unifomtity
2003     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
2004       Parser.Lex();
2005       Negate = true;
2006     }
2007   }
2008 
2009   if (getLexer().getKind() == AsmToken::Identifier &&
2010       Parser.getTok().getString() == "neg") {
2011     if (Negate) {
2012       Error(Parser.getTok().getLoc(), "expected register or immediate");
2013       return MatchOperand_ParseFail;
2014     }
2015     Parser.Lex();
2016     Negate2 = true;
2017     if (getLexer().isNot(AsmToken::LParen)) {
2018       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2019       return MatchOperand_ParseFail;
2020     }
2021     Parser.Lex();
2022   }
2023 
2024   if (getLexer().getKind() == AsmToken::Identifier &&
2025       Parser.getTok().getString() == "abs") {
2026     Parser.Lex();
2027     Abs2 = true;
2028     if (getLexer().isNot(AsmToken::LParen)) {
2029       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2030       return MatchOperand_ParseFail;
2031     }
2032     Parser.Lex();
2033   }
2034 
2035   if (getLexer().getKind() == AsmToken::Pipe) {
2036     if (Abs2) {
2037       Error(Parser.getTok().getLoc(), "expected register or immediate");
2038       return MatchOperand_ParseFail;
2039     }
2040     Parser.Lex();
2041     Abs = true;
2042   }
2043 
2044   OperandMatchResultTy Res;
2045   if (AllowImm) {
2046     Res = parseRegOrImm(Operands, Abs);
2047   } else {
2048     Res = parseReg(Operands);
2049   }
2050   if (Res != MatchOperand_Success) {
2051     return Res;
2052   }
2053 
2054   AMDGPUOperand::Modifiers Mods;
2055   if (Abs) {
2056     if (getLexer().getKind() != AsmToken::Pipe) {
2057       Error(Parser.getTok().getLoc(), "expected vertical bar");
2058       return MatchOperand_ParseFail;
2059     }
2060     Parser.Lex();
2061     Mods.Abs = true;
2062   }
2063   if (Abs2) {
2064     if (getLexer().isNot(AsmToken::RParen)) {
2065       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2066       return MatchOperand_ParseFail;
2067     }
2068     Parser.Lex();
2069     Mods.Abs = true;
2070   }
2071 
2072   if (Negate) {
2073     Mods.Neg = true;
2074   } else if (Negate2) {
2075     if (getLexer().isNot(AsmToken::RParen)) {
2076       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2077       return MatchOperand_ParseFail;
2078     }
2079     Parser.Lex();
2080     Mods.Neg = true;
2081   }
2082 
2083   if (Mods.hasFPModifiers()) {
2084     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2085     Op.setModifiers(Mods);
2086   }
2087   return MatchOperand_Success;
2088 }
2089 
2090 OperandMatchResultTy
2091 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2092                                                bool AllowImm) {
2093   bool Sext = false;
2094 
2095   if (getLexer().getKind() == AsmToken::Identifier &&
2096       Parser.getTok().getString() == "sext") {
2097     Parser.Lex();
2098     Sext = true;
2099     if (getLexer().isNot(AsmToken::LParen)) {
2100       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2101       return MatchOperand_ParseFail;
2102     }
2103     Parser.Lex();
2104   }
2105 
2106   OperandMatchResultTy Res;
2107   if (AllowImm) {
2108     Res = parseRegOrImm(Operands);
2109   } else {
2110     Res = parseReg(Operands);
2111   }
2112   if (Res != MatchOperand_Success) {
2113     return Res;
2114   }
2115 
2116   AMDGPUOperand::Modifiers Mods;
2117   if (Sext) {
2118     if (getLexer().isNot(AsmToken::RParen)) {
2119       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2120       return MatchOperand_ParseFail;
2121     }
2122     Parser.Lex();
2123     Mods.Sext = true;
2124   }
2125 
2126   if (Mods.hasIntModifiers()) {
2127     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2128     Op.setModifiers(Mods);
2129   }
2130 
2131   return MatchOperand_Success;
2132 }
2133 
2134 OperandMatchResultTy
2135 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2136   return parseRegOrImmWithFPInputMods(Operands, false);
2137 }
2138 
2139 OperandMatchResultTy
2140 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2141   return parseRegOrImmWithIntInputMods(Operands, false);
2142 }
2143 
2144 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2145   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2146   if (Reg) {
2147     Operands.push_back(std::move(Reg));
2148     return MatchOperand_Success;
2149   }
2150 
2151   const AsmToken &Tok = Parser.getTok();
2152   if (Tok.getString() == "off") {
2153     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2154                                                 AMDGPUOperand::ImmTyOff, false));
2155     Parser.Lex();
2156     return MatchOperand_Success;
2157   }
2158 
2159   return MatchOperand_NoMatch;
2160 }
2161 
2162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2163   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2164 
2165   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2166       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2167       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2168       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2169     return Match_InvalidOperand;
2170 
2171   if ((TSFlags & SIInstrFlags::VOP3) &&
2172       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2173       getForcedEncodingSize() != 64)
2174     return Match_PreferE32;
2175 
2176   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2177       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2178     // v_mac_f32/16 allow only dst_sel == DWORD;
2179     auto OpNum =
2180         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2181     const auto &Op = Inst.getOperand(OpNum);
2182     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2183       return Match_InvalidOperand;
2184     }
2185   }
2186 
2187   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2188     // FIXME: Produces error without correct column reported.
2189     auto OpNum =
2190         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2191     const auto &Op = Inst.getOperand(OpNum);
2192     if (Op.getImm() != 0)
2193       return Match_InvalidOperand;
2194   }
2195 
2196   return Match_Success;
2197 }
2198 
2199 // What asm variants we should check
2200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2201   if (getForcedEncodingSize() == 32) {
2202     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2203     return makeArrayRef(Variants);
2204   }
2205 
2206   if (isForcedVOP3()) {
2207     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2208     return makeArrayRef(Variants);
2209   }
2210 
2211   if (isForcedSDWA()) {
2212     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2213                                         AMDGPUAsmVariants::SDWA9};
2214     return makeArrayRef(Variants);
2215   }
2216 
2217   if (isForcedDPP()) {
2218     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2219     return makeArrayRef(Variants);
2220   }
2221 
2222   static const unsigned Variants[] = {
2223     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2224     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2225   };
2226 
2227   return makeArrayRef(Variants);
2228 }
2229 
2230 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2231   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2232   const unsigned Num = Desc.getNumImplicitUses();
2233   for (unsigned i = 0; i < Num; ++i) {
2234     unsigned Reg = Desc.ImplicitUses[i];
2235     switch (Reg) {
2236     case AMDGPU::FLAT_SCR:
2237     case AMDGPU::VCC:
2238     case AMDGPU::M0:
2239       return Reg;
2240     default:
2241       break;
2242     }
2243   }
2244   return AMDGPU::NoRegister;
2245 }
2246 
2247 // NB: This code is correct only when used to check constant
2248 // bus limitations because GFX7 support no f16 inline constants.
2249 // Note that there are no cases when a GFX7 opcode violates
2250 // constant bus limitations due to the use of an f16 constant.
2251 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2252                                        unsigned OpIdx) const {
2253   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2254 
2255   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2256     return false;
2257   }
2258 
2259   const MCOperand &MO = Inst.getOperand(OpIdx);
2260 
2261   int64_t Val = MO.getImm();
2262   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2263 
2264   switch (OpSize) { // expected operand size
2265   case 8:
2266     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2267   case 4:
2268     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2269   case 2: {
2270     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2271     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2272         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2273       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2274     } else {
2275       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2276     }
2277   }
2278   default:
2279     llvm_unreachable("invalid operand size");
2280   }
2281 }
2282 
2283 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2284   const MCOperand &MO = Inst.getOperand(OpIdx);
2285   if (MO.isImm()) {
2286     return !isInlineConstant(Inst, OpIdx);
2287   }
2288   return !MO.isReg() ||
2289          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2290 }
2291 
2292 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2293   const unsigned Opcode = Inst.getOpcode();
2294   const MCInstrDesc &Desc = MII.get(Opcode);
2295   unsigned ConstantBusUseCount = 0;
2296 
2297   if (Desc.TSFlags &
2298       (SIInstrFlags::VOPC |
2299        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2300        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2301        SIInstrFlags::SDWA)) {
2302     // Check special imm operands (used by madmk, etc)
2303     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2304       ++ConstantBusUseCount;
2305     }
2306 
2307     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2308     if (SGPRUsed != AMDGPU::NoRegister) {
2309       ++ConstantBusUseCount;
2310     }
2311 
2312     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2313     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2314     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2315 
2316     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2317 
2318     for (int OpIdx : OpIndices) {
2319       if (OpIdx == -1) break;
2320 
2321       const MCOperand &MO = Inst.getOperand(OpIdx);
2322       if (usesConstantBus(Inst, OpIdx)) {
2323         if (MO.isReg()) {
2324           const unsigned Reg = mc2PseudoReg(MO.getReg());
2325           // Pairs of registers with a partial intersections like these
2326           //   s0, s[0:1]
2327           //   flat_scratch_lo, flat_scratch
2328           //   flat_scratch_lo, flat_scratch_hi
2329           // are theoretically valid but they are disabled anyway.
2330           // Note that this code mimics SIInstrInfo::verifyInstruction
2331           if (Reg != SGPRUsed) {
2332             ++ConstantBusUseCount;
2333           }
2334           SGPRUsed = Reg;
2335         } else { // Expression or a literal
2336           ++ConstantBusUseCount;
2337         }
2338       }
2339     }
2340   }
2341 
2342   return ConstantBusUseCount <= 1;
2343 }
2344 
2345 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2346   const unsigned Opcode = Inst.getOpcode();
2347   const MCInstrDesc &Desc = MII.get(Opcode);
2348 
2349   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2350   if (DstIdx == -1 ||
2351       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2352     return true;
2353   }
2354 
2355   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2356 
2357   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2358   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2359   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2360 
2361   assert(DstIdx != -1);
2362   const MCOperand &Dst = Inst.getOperand(DstIdx);
2363   assert(Dst.isReg());
2364   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2365 
2366   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2367 
2368   for (int SrcIdx : SrcIndices) {
2369     if (SrcIdx == -1) break;
2370     const MCOperand &Src = Inst.getOperand(SrcIdx);
2371     if (Src.isReg()) {
2372       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2373       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2374         return false;
2375       }
2376     }
2377   }
2378 
2379   return true;
2380 }
2381 
2382 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2383 
2384   const unsigned Opc = Inst.getOpcode();
2385   const MCInstrDesc &Desc = MII.get(Opc);
2386 
2387   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2388     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2389     assert(ClampIdx != -1);
2390     return Inst.getOperand(ClampIdx).getImm() == 0;
2391   }
2392 
2393   return true;
2394 }
2395 
2396 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2397 
2398   const unsigned Opc = Inst.getOpcode();
2399   const MCInstrDesc &Desc = MII.get(Opc);
2400 
2401   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2402     return true;
2403 
2404   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2405   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2406   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2407 
2408   assert(VDataIdx != -1);
2409   assert(DMaskIdx != -1);
2410   assert(TFEIdx != -1);
2411 
2412   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2413   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2414   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2415   if (DMask == 0)
2416     DMask = 1;
2417 
2418   unsigned DataSize =
2419     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2420   if (hasPackedD16()) {
2421     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2422     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2423       DataSize = (DataSize + 1) / 2;
2424   }
2425 
2426   return (VDataSize / 4) == DataSize + TFESize;
2427 }
2428 
2429 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2430 
2431   const unsigned Opc = Inst.getOpcode();
2432   const MCInstrDesc &Desc = MII.get(Opc);
2433 
2434   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2435     return true;
2436   if (!Desc.mayLoad() || !Desc.mayStore())
2437     return true; // Not atomic
2438 
2439   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2440   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2441 
2442   // This is an incomplete check because image_atomic_cmpswap
2443   // may only use 0x3 and 0xf while other atomic operations
2444   // may use 0x1 and 0x3. However these limitations are
2445   // verified when we check that dmask matches dst size.
2446   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2447 }
2448 
2449 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2450 
2451   const unsigned Opc = Inst.getOpcode();
2452   const MCInstrDesc &Desc = MII.get(Opc);
2453 
2454   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2455     return true;
2456 
2457   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2458   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2459 
2460   // GATHER4 instructions use dmask in a different fashion compared to
2461   // other MIMG instructions. The only useful DMASK values are
2462   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2463   // (red,red,red,red) etc.) The ISA document doesn't mention
2464   // this.
2465   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2466 }
2467 
2468 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2469 
2470   const unsigned Opc = Inst.getOpcode();
2471   const MCInstrDesc &Desc = MII.get(Opc);
2472 
2473   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2474     return true;
2475 
2476   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2477   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2478     if (isCI() || isSI())
2479       return false;
2480   }
2481 
2482   return true;
2483 }
2484 
2485 static bool IsRevOpcode(const unsigned Opcode)
2486 {
2487   switch (Opcode) {
2488   case AMDGPU::V_SUBREV_F32_e32:
2489   case AMDGPU::V_SUBREV_F32_e64:
2490   case AMDGPU::V_SUBREV_F32_e32_si:
2491   case AMDGPU::V_SUBREV_F32_e32_vi:
2492   case AMDGPU::V_SUBREV_F32_e64_si:
2493   case AMDGPU::V_SUBREV_F32_e64_vi:
2494   case AMDGPU::V_SUBREV_I32_e32:
2495   case AMDGPU::V_SUBREV_I32_e64:
2496   case AMDGPU::V_SUBREV_I32_e32_si:
2497   case AMDGPU::V_SUBREV_I32_e64_si:
2498   case AMDGPU::V_SUBBREV_U32_e32:
2499   case AMDGPU::V_SUBBREV_U32_e64:
2500   case AMDGPU::V_SUBBREV_U32_e32_si:
2501   case AMDGPU::V_SUBBREV_U32_e32_vi:
2502   case AMDGPU::V_SUBBREV_U32_e64_si:
2503   case AMDGPU::V_SUBBREV_U32_e64_vi:
2504   case AMDGPU::V_SUBREV_U32_e32:
2505   case AMDGPU::V_SUBREV_U32_e64:
2506   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2507   case AMDGPU::V_SUBREV_U32_e32_vi:
2508   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2509   case AMDGPU::V_SUBREV_U32_e64_vi:
2510   case AMDGPU::V_SUBREV_F16_e32:
2511   case AMDGPU::V_SUBREV_F16_e64:
2512   case AMDGPU::V_SUBREV_F16_e32_vi:
2513   case AMDGPU::V_SUBREV_F16_e64_vi:
2514   case AMDGPU::V_SUBREV_U16_e32:
2515   case AMDGPU::V_SUBREV_U16_e64:
2516   case AMDGPU::V_SUBREV_U16_e32_vi:
2517   case AMDGPU::V_SUBREV_U16_e64_vi:
2518   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2519   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2520   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2521   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2522   case AMDGPU::V_LSHLREV_B32_e32_si:
2523   case AMDGPU::V_LSHLREV_B32_e64_si:
2524   case AMDGPU::V_LSHLREV_B16_e32_vi:
2525   case AMDGPU::V_LSHLREV_B16_e64_vi:
2526   case AMDGPU::V_LSHLREV_B32_e32_vi:
2527   case AMDGPU::V_LSHLREV_B32_e64_vi:
2528   case AMDGPU::V_LSHLREV_B64_vi:
2529   case AMDGPU::V_LSHRREV_B32_e32_si:
2530   case AMDGPU::V_LSHRREV_B32_e64_si:
2531   case AMDGPU::V_LSHRREV_B16_e32_vi:
2532   case AMDGPU::V_LSHRREV_B16_e64_vi:
2533   case AMDGPU::V_LSHRREV_B32_e32_vi:
2534   case AMDGPU::V_LSHRREV_B32_e64_vi:
2535   case AMDGPU::V_LSHRREV_B64_vi:
2536   case AMDGPU::V_ASHRREV_I32_e64_si:
2537   case AMDGPU::V_ASHRREV_I32_e32_si:
2538   case AMDGPU::V_ASHRREV_I16_e32_vi:
2539   case AMDGPU::V_ASHRREV_I16_e64_vi:
2540   case AMDGPU::V_ASHRREV_I32_e32_vi:
2541   case AMDGPU::V_ASHRREV_I32_e64_vi:
2542   case AMDGPU::V_ASHRREV_I64_vi:
2543   case AMDGPU::V_PK_LSHLREV_B16_vi:
2544   case AMDGPU::V_PK_LSHRREV_B16_vi:
2545   case AMDGPU::V_PK_ASHRREV_I16_vi:
2546     return true;
2547   default:
2548     return false;
2549   }
2550 }
2551 
2552 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2553 
2554   using namespace SIInstrFlags;
2555   const unsigned Opcode = Inst.getOpcode();
2556   const MCInstrDesc &Desc = MII.get(Opcode);
2557 
2558   // lds_direct register is defined so that it can be used
2559   // with 9-bit operands only. Ignore encodings which do not accept these.
2560   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2561     return true;
2562 
2563   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2564   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2565   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2566 
2567   const int SrcIndices[] = { Src1Idx, Src2Idx };
2568 
2569   // lds_direct cannot be specified as either src1 or src2.
2570   for (int SrcIdx : SrcIndices) {
2571     if (SrcIdx == -1) break;
2572     const MCOperand &Src = Inst.getOperand(SrcIdx);
2573     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2574       return false;
2575     }
2576   }
2577 
2578   if (Src0Idx == -1)
2579     return true;
2580 
2581   const MCOperand &Src = Inst.getOperand(Src0Idx);
2582   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2583     return true;
2584 
2585   // lds_direct is specified as src0. Check additional limitations.
2586   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2587 }
2588 
2589 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2590   unsigned Opcode = Inst.getOpcode();
2591   const MCInstrDesc &Desc = MII.get(Opcode);
2592   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2593     return true;
2594 
2595   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2596   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2597 
2598   const int OpIndices[] = { Src0Idx, Src1Idx };
2599 
2600   unsigned NumLiterals = 0;
2601   uint32_t LiteralValue;
2602 
2603   for (int OpIdx : OpIndices) {
2604     if (OpIdx == -1) break;
2605 
2606     const MCOperand &MO = Inst.getOperand(OpIdx);
2607     if (MO.isImm() &&
2608         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2609         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2610         !isInlineConstant(Inst, OpIdx)) {
2611       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2612       if (NumLiterals == 0 || LiteralValue != Value) {
2613         LiteralValue = Value;
2614         ++NumLiterals;
2615       }
2616     }
2617   }
2618 
2619   return NumLiterals <= 1;
2620 }
2621 
2622 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2623                                           const SMLoc &IDLoc) {
2624   if (!validateLdsDirect(Inst)) {
2625     Error(IDLoc,
2626       "invalid use of lds_direct");
2627     return false;
2628   }
2629   if (!validateSOPLiteral(Inst)) {
2630     Error(IDLoc,
2631       "only one literal operand is allowed");
2632     return false;
2633   }
2634   if (!validateConstantBusLimitations(Inst)) {
2635     Error(IDLoc,
2636       "invalid operand (violates constant bus restrictions)");
2637     return false;
2638   }
2639   if (!validateEarlyClobberLimitations(Inst)) {
2640     Error(IDLoc,
2641       "destination must be different than all sources");
2642     return false;
2643   }
2644   if (!validateIntClampSupported(Inst)) {
2645     Error(IDLoc,
2646       "integer clamping is not supported on this GPU");
2647     return false;
2648   }
2649   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2650   if (!validateMIMGD16(Inst)) {
2651     Error(IDLoc,
2652       "d16 modifier is not supported on this GPU");
2653     return false;
2654   }
2655   if (!validateMIMGDataSize(Inst)) {
2656     Error(IDLoc,
2657       "image data size does not match dmask and tfe");
2658     return false;
2659   }
2660   if (!validateMIMGAtomicDMask(Inst)) {
2661     Error(IDLoc,
2662       "invalid atomic image dmask");
2663     return false;
2664   }
2665   if (!validateMIMGGatherDMask(Inst)) {
2666     Error(IDLoc,
2667       "invalid image_gather dmask: only one bit must be set");
2668     return false;
2669   }
2670 
2671   return true;
2672 }
2673 
2674 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2675                                             unsigned VariantID = 0);
2676 
2677 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2678                                               OperandVector &Operands,
2679                                               MCStreamer &Out,
2680                                               uint64_t &ErrorInfo,
2681                                               bool MatchingInlineAsm) {
2682   MCInst Inst;
2683   unsigned Result = Match_Success;
2684   for (auto Variant : getMatchedVariants()) {
2685     uint64_t EI;
2686     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2687                                   Variant);
2688     // We order match statuses from least to most specific. We use most specific
2689     // status as resulting
2690     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2691     if ((R == Match_Success) ||
2692         (R == Match_PreferE32) ||
2693         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2694         (R == Match_InvalidOperand && Result != Match_MissingFeature
2695                                    && Result != Match_PreferE32) ||
2696         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2697                                    && Result != Match_MissingFeature
2698                                    && Result != Match_PreferE32)) {
2699       Result = R;
2700       ErrorInfo = EI;
2701     }
2702     if (R == Match_Success)
2703       break;
2704   }
2705 
2706   switch (Result) {
2707   default: break;
2708   case Match_Success:
2709     if (!validateInstruction(Inst, IDLoc)) {
2710       return true;
2711     }
2712     Inst.setLoc(IDLoc);
2713     Out.EmitInstruction(Inst, getSTI());
2714     return false;
2715 
2716   case Match_MissingFeature:
2717     return Error(IDLoc, "instruction not supported on this GPU");
2718 
2719   case Match_MnemonicFail: {
2720     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2721     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2722         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2723     return Error(IDLoc, "invalid instruction" + Suggestion,
2724                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2725   }
2726 
2727   case Match_InvalidOperand: {
2728     SMLoc ErrorLoc = IDLoc;
2729     if (ErrorInfo != ~0ULL) {
2730       if (ErrorInfo >= Operands.size()) {
2731         return Error(IDLoc, "too few operands for instruction");
2732       }
2733       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2734       if (ErrorLoc == SMLoc())
2735         ErrorLoc = IDLoc;
2736     }
2737     return Error(ErrorLoc, "invalid operand for instruction");
2738   }
2739 
2740   case Match_PreferE32:
2741     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2742                         "should be encoded as e32");
2743   }
2744   llvm_unreachable("Implement any new match types added!");
2745 }
2746 
2747 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2748   int64_t Tmp = -1;
2749   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2750     return true;
2751   }
2752   if (getParser().parseAbsoluteExpression(Tmp)) {
2753     return true;
2754   }
2755   Ret = static_cast<uint32_t>(Tmp);
2756   return false;
2757 }
2758 
2759 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2760                                                uint32_t &Minor) {
2761   if (ParseAsAbsoluteExpression(Major))
2762     return TokError("invalid major version");
2763 
2764   if (getLexer().isNot(AsmToken::Comma))
2765     return TokError("minor version number required, comma expected");
2766   Lex();
2767 
2768   if (ParseAsAbsoluteExpression(Minor))
2769     return TokError("invalid minor version");
2770 
2771   return false;
2772 }
2773 
2774 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2775   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2776     return TokError("directive only supported for amdgcn architecture");
2777 
2778   std::string Target;
2779 
2780   SMLoc TargetStart = getTok().getLoc();
2781   if (getParser().parseEscapedString(Target))
2782     return true;
2783   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2784 
2785   std::string ExpectedTarget;
2786   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2787   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2788 
2789   if (Target != ExpectedTargetOS.str())
2790     return getParser().Error(TargetRange.Start, "target must match options",
2791                              TargetRange);
2792 
2793   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2794   return false;
2795 }
2796 
2797 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2798   return getParser().Error(Range.Start, "value out of range", Range);
2799 }
2800 
2801 bool AMDGPUAsmParser::calculateGPRBlocks(
2802     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2803     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2804     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2805     unsigned &SGPRBlocks) {
2806   // TODO(scott.linder): These calculations are duplicated from
2807   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2808   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2809 
2810   unsigned NumVGPRs = NextFreeVGPR;
2811   unsigned NumSGPRs = NextFreeSGPR;
2812   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2813 
2814   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2815       NumSGPRs > MaxAddressableNumSGPRs)
2816     return OutOfRangeError(SGPRRange);
2817 
2818   NumSGPRs +=
2819       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2820 
2821   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2822       NumSGPRs > MaxAddressableNumSGPRs)
2823     return OutOfRangeError(SGPRRange);
2824 
2825   if (Features.test(FeatureSGPRInitBug))
2826     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2827 
2828   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2829   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2830 
2831   return false;
2832 }
2833 
2834 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2835   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2836     return TokError("directive only supported for amdgcn architecture");
2837 
2838   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2839     return TokError("directive only supported for amdhsa OS");
2840 
2841   StringRef KernelName;
2842   if (getParser().parseIdentifier(KernelName))
2843     return true;
2844 
2845   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2846 
2847   StringSet<> Seen;
2848 
2849   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2850 
2851   SMRange VGPRRange;
2852   uint64_t NextFreeVGPR = 0;
2853   SMRange SGPRRange;
2854   uint64_t NextFreeSGPR = 0;
2855   unsigned UserSGPRCount = 0;
2856   bool ReserveVCC = true;
2857   bool ReserveFlatScr = true;
2858   bool ReserveXNACK = hasXNACK();
2859 
2860   while (true) {
2861     while (getLexer().is(AsmToken::EndOfStatement))
2862       Lex();
2863 
2864     if (getLexer().isNot(AsmToken::Identifier))
2865       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2866 
2867     StringRef ID = getTok().getIdentifier();
2868     SMRange IDRange = getTok().getLocRange();
2869     Lex();
2870 
2871     if (ID == ".end_amdhsa_kernel")
2872       break;
2873 
2874     if (Seen.find(ID) != Seen.end())
2875       return TokError(".amdhsa_ directives cannot be repeated");
2876     Seen.insert(ID);
2877 
2878     SMLoc ValStart = getTok().getLoc();
2879     int64_t IVal;
2880     if (getParser().parseAbsoluteExpression(IVal))
2881       return true;
2882     SMLoc ValEnd = getTok().getLoc();
2883     SMRange ValRange = SMRange(ValStart, ValEnd);
2884 
2885     if (IVal < 0)
2886       return OutOfRangeError(ValRange);
2887 
2888     uint64_t Val = IVal;
2889 
2890 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2891   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2892     return OutOfRangeError(RANGE);                                             \
2893   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2894 
2895     if (ID == ".amdhsa_group_segment_fixed_size") {
2896       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2897         return OutOfRangeError(ValRange);
2898       KD.group_segment_fixed_size = Val;
2899     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2900       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2901         return OutOfRangeError(ValRange);
2902       KD.private_segment_fixed_size = Val;
2903     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2904       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2905                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2906                        Val, ValRange);
2907       UserSGPRCount++;
2908     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2909       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2910                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2911                        ValRange);
2912       UserSGPRCount++;
2913     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2914       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2915                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2916                        ValRange);
2917       UserSGPRCount++;
2918     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2919       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2920                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2921                        Val, ValRange);
2922       UserSGPRCount++;
2923     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2924       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2925                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2926                        ValRange);
2927       UserSGPRCount++;
2928     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2929       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2930                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2931                        ValRange);
2932       UserSGPRCount++;
2933     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2934       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2935                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2936                        Val, ValRange);
2937       UserSGPRCount++;
2938     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2939       PARSE_BITS_ENTRY(
2940           KD.compute_pgm_rsrc2,
2941           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2942           ValRange);
2943     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2944       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2945                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2946                        ValRange);
2947     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2949                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2950                        ValRange);
2951     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2953                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2954                        ValRange);
2955     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2957                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2958                        ValRange);
2959     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2960       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2961                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2962                        ValRange);
2963     } else if (ID == ".amdhsa_next_free_vgpr") {
2964       VGPRRange = ValRange;
2965       NextFreeVGPR = Val;
2966     } else if (ID == ".amdhsa_next_free_sgpr") {
2967       SGPRRange = ValRange;
2968       NextFreeSGPR = Val;
2969     } else if (ID == ".amdhsa_reserve_vcc") {
2970       if (!isUInt<1>(Val))
2971         return OutOfRangeError(ValRange);
2972       ReserveVCC = Val;
2973     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2974       if (IVersion.Major < 7)
2975         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2976                                  IDRange);
2977       if (!isUInt<1>(Val))
2978         return OutOfRangeError(ValRange);
2979       ReserveFlatScr = Val;
2980     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2981       if (IVersion.Major < 8)
2982         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2983                                  IDRange);
2984       if (!isUInt<1>(Val))
2985         return OutOfRangeError(ValRange);
2986       ReserveXNACK = Val;
2987     } else if (ID == ".amdhsa_float_round_mode_32") {
2988       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2989                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2990     } else if (ID == ".amdhsa_float_round_mode_16_64") {
2991       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2992                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2993     } else if (ID == ".amdhsa_float_denorm_mode_32") {
2994       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2995                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2996     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2998                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2999                        ValRange);
3000     } else if (ID == ".amdhsa_dx10_clamp") {
3001       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3002                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3003     } else if (ID == ".amdhsa_ieee_mode") {
3004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3005                        Val, ValRange);
3006     } else if (ID == ".amdhsa_fp16_overflow") {
3007       if (IVersion.Major < 9)
3008         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3009                                  IDRange);
3010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3011                        ValRange);
3012     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3013       PARSE_BITS_ENTRY(
3014           KD.compute_pgm_rsrc2,
3015           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3016           ValRange);
3017     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3019                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3020                        Val, ValRange);
3021     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3022       PARSE_BITS_ENTRY(
3023           KD.compute_pgm_rsrc2,
3024           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3025           ValRange);
3026     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3028                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3029                        Val, ValRange);
3030     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3032                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3033                        Val, ValRange);
3034     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3035       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3036                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3037                        Val, ValRange);
3038     } else if (ID == ".amdhsa_exception_int_div_zero") {
3039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3040                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3041                        Val, ValRange);
3042     } else {
3043       return getParser().Error(IDRange.Start,
3044                                "unknown .amdhsa_kernel directive", IDRange);
3045     }
3046 
3047 #undef PARSE_BITS_ENTRY
3048   }
3049 
3050   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3051     return TokError(".amdhsa_next_free_vgpr directive is required");
3052 
3053   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3054     return TokError(".amdhsa_next_free_sgpr directive is required");
3055 
3056   unsigned VGPRBlocks;
3057   unsigned SGPRBlocks;
3058   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3059                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3060                          SGPRRange, VGPRBlocks, SGPRBlocks))
3061     return true;
3062 
3063   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3064           VGPRBlocks))
3065     return OutOfRangeError(VGPRRange);
3066   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3067                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3068 
3069   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3070           SGPRBlocks))
3071     return OutOfRangeError(SGPRRange);
3072   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3073                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3074                   SGPRBlocks);
3075 
3076   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3077     return TokError("too many user SGPRs enabled");
3078   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3079                   UserSGPRCount);
3080 
3081   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3082       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3083       ReserveFlatScr, ReserveXNACK);
3084   return false;
3085 }
3086 
3087 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3088   uint32_t Major;
3089   uint32_t Minor;
3090 
3091   if (ParseDirectiveMajorMinor(Major, Minor))
3092     return true;
3093 
3094   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3095   return false;
3096 }
3097 
3098 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3099   uint32_t Major;
3100   uint32_t Minor;
3101   uint32_t Stepping;
3102   StringRef VendorName;
3103   StringRef ArchName;
3104 
3105   // If this directive has no arguments, then use the ISA version for the
3106   // targeted GPU.
3107   if (getLexer().is(AsmToken::EndOfStatement)) {
3108     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3109     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3110                                                       ISA.Stepping,
3111                                                       "AMD", "AMDGPU");
3112     return false;
3113   }
3114 
3115   if (ParseDirectiveMajorMinor(Major, Minor))
3116     return true;
3117 
3118   if (getLexer().isNot(AsmToken::Comma))
3119     return TokError("stepping version number required, comma expected");
3120   Lex();
3121 
3122   if (ParseAsAbsoluteExpression(Stepping))
3123     return TokError("invalid stepping version");
3124 
3125   if (getLexer().isNot(AsmToken::Comma))
3126     return TokError("vendor name required, comma expected");
3127   Lex();
3128 
3129   if (getLexer().isNot(AsmToken::String))
3130     return TokError("invalid vendor name");
3131 
3132   VendorName = getLexer().getTok().getStringContents();
3133   Lex();
3134 
3135   if (getLexer().isNot(AsmToken::Comma))
3136     return TokError("arch name required, comma expected");
3137   Lex();
3138 
3139   if (getLexer().isNot(AsmToken::String))
3140     return TokError("invalid arch name");
3141 
3142   ArchName = getLexer().getTok().getStringContents();
3143   Lex();
3144 
3145   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3146                                                     VendorName, ArchName);
3147   return false;
3148 }
3149 
3150 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3151                                                amd_kernel_code_t &Header) {
3152   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3153   // assembly for backwards compatibility.
3154   if (ID == "max_scratch_backing_memory_byte_size") {
3155     Parser.eatToEndOfStatement();
3156     return false;
3157   }
3158 
3159   SmallString<40> ErrStr;
3160   raw_svector_ostream Err(ErrStr);
3161   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3162     return TokError(Err.str());
3163   }
3164   Lex();
3165   return false;
3166 }
3167 
3168 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3169   amd_kernel_code_t Header;
3170   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3171 
3172   while (true) {
3173     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3174     // will set the current token to EndOfStatement.
3175     while(getLexer().is(AsmToken::EndOfStatement))
3176       Lex();
3177 
3178     if (getLexer().isNot(AsmToken::Identifier))
3179       return TokError("expected value identifier or .end_amd_kernel_code_t");
3180 
3181     StringRef ID = getLexer().getTok().getIdentifier();
3182     Lex();
3183 
3184     if (ID == ".end_amd_kernel_code_t")
3185       break;
3186 
3187     if (ParseAMDKernelCodeTValue(ID, Header))
3188       return true;
3189   }
3190 
3191   getTargetStreamer().EmitAMDKernelCodeT(Header);
3192 
3193   return false;
3194 }
3195 
3196 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3197   if (getLexer().isNot(AsmToken::Identifier))
3198     return TokError("expected symbol name");
3199 
3200   StringRef KernelName = Parser.getTok().getString();
3201 
3202   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3203                                            ELF::STT_AMDGPU_HSA_KERNEL);
3204   Lex();
3205   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3206     KernelScope.initialize(getContext());
3207   return false;
3208 }
3209 
3210 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3211   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3212     return Error(getParser().getTok().getLoc(),
3213                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3214                  "architectures");
3215   }
3216 
3217   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3218 
3219   std::string ISAVersionStringFromSTI;
3220   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3221   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3222 
3223   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3224     return Error(getParser().getTok().getLoc(),
3225                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3226                  "arguments specified through the command line");
3227   }
3228 
3229   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3230   Lex();
3231 
3232   return false;
3233 }
3234 
3235 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3236   const char *AssemblerDirectiveBegin;
3237   const char *AssemblerDirectiveEnd;
3238   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3239       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3240           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3241                             HSAMD::V3::AssemblerDirectiveEnd)
3242           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3243                             HSAMD::AssemblerDirectiveEnd);
3244 
3245   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3246     return Error(getParser().getTok().getLoc(),
3247                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3248                  "not available on non-amdhsa OSes")).str());
3249   }
3250 
3251   std::string HSAMetadataString;
3252   raw_string_ostream YamlStream(HSAMetadataString);
3253 
3254   getLexer().setSkipSpace(false);
3255 
3256   bool FoundEnd = false;
3257   while (!getLexer().is(AsmToken::Eof)) {
3258     while (getLexer().is(AsmToken::Space)) {
3259       YamlStream << getLexer().getTok().getString();
3260       Lex();
3261     }
3262 
3263     if (getLexer().is(AsmToken::Identifier)) {
3264       StringRef ID = getLexer().getTok().getIdentifier();
3265       if (ID == AssemblerDirectiveEnd) {
3266         Lex();
3267         FoundEnd = true;
3268         break;
3269       }
3270     }
3271 
3272     YamlStream << Parser.parseStringToEndOfStatement()
3273                << getContext().getAsmInfo()->getSeparatorString();
3274 
3275     Parser.eatToEndOfStatement();
3276   }
3277 
3278   getLexer().setSkipSpace(true);
3279 
3280   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3281     return TokError(Twine("expected directive ") +
3282                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3283   }
3284 
3285   YamlStream.flush();
3286 
3287   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3288     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3289       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3290   } else {
3291     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3292       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3293   }
3294 
3295   return false;
3296 }
3297 
3298 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3299   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3300     return Error(getParser().getTok().getLoc(),
3301                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3302                  "not available on non-amdpal OSes")).str());
3303   }
3304 
3305   PALMD::Metadata PALMetadata;
3306   for (;;) {
3307     uint32_t Value;
3308     if (ParseAsAbsoluteExpression(Value)) {
3309       return TokError(Twine("invalid value in ") +
3310                       Twine(PALMD::AssemblerDirective));
3311     }
3312     PALMetadata.push_back(Value);
3313     if (getLexer().isNot(AsmToken::Comma))
3314       break;
3315     Lex();
3316   }
3317   getTargetStreamer().EmitPALMetadata(PALMetadata);
3318   return false;
3319 }
3320 
3321 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3322   StringRef IDVal = DirectiveID.getString();
3323 
3324   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3325     if (IDVal == ".amdgcn_target")
3326       return ParseDirectiveAMDGCNTarget();
3327 
3328     if (IDVal == ".amdhsa_kernel")
3329       return ParseDirectiveAMDHSAKernel();
3330 
3331     // TODO: Restructure/combine with PAL metadata directive.
3332     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3333       return ParseDirectiveHSAMetadata();
3334   } else {
3335     if (IDVal == ".hsa_code_object_version")
3336       return ParseDirectiveHSACodeObjectVersion();
3337 
3338     if (IDVal == ".hsa_code_object_isa")
3339       return ParseDirectiveHSACodeObjectISA();
3340 
3341     if (IDVal == ".amd_kernel_code_t")
3342       return ParseDirectiveAMDKernelCodeT();
3343 
3344     if (IDVal == ".amdgpu_hsa_kernel")
3345       return ParseDirectiveAMDGPUHsaKernel();
3346 
3347     if (IDVal == ".amd_amdgpu_isa")
3348       return ParseDirectiveISAVersion();
3349 
3350     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3351       return ParseDirectiveHSAMetadata();
3352   }
3353 
3354   if (IDVal == PALMD::AssemblerDirective)
3355     return ParseDirectivePALMetadata();
3356 
3357   return true;
3358 }
3359 
3360 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3361                                            unsigned RegNo) const {
3362 
3363   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3364        R.isValid(); ++R) {
3365     if (*R == RegNo)
3366       return isGFX9();
3367   }
3368 
3369   switch (RegNo) {
3370   case AMDGPU::TBA:
3371   case AMDGPU::TBA_LO:
3372   case AMDGPU::TBA_HI:
3373   case AMDGPU::TMA:
3374   case AMDGPU::TMA_LO:
3375   case AMDGPU::TMA_HI:
3376     return !isGFX9();
3377   case AMDGPU::XNACK_MASK:
3378   case AMDGPU::XNACK_MASK_LO:
3379   case AMDGPU::XNACK_MASK_HI:
3380     return !isCI() && !isSI() && hasXNACK();
3381   default:
3382     break;
3383   }
3384 
3385   if (isCI())
3386     return true;
3387 
3388   if (isSI()) {
3389     // No flat_scr
3390     switch (RegNo) {
3391     case AMDGPU::FLAT_SCR:
3392     case AMDGPU::FLAT_SCR_LO:
3393     case AMDGPU::FLAT_SCR_HI:
3394       return false;
3395     default:
3396       return true;
3397     }
3398   }
3399 
3400   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3401   // SI/CI have.
3402   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3403        R.isValid(); ++R) {
3404     if (*R == RegNo)
3405       return false;
3406   }
3407 
3408   return true;
3409 }
3410 
3411 OperandMatchResultTy
3412 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3413   // Try to parse with a custom parser
3414   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3415 
3416   // If we successfully parsed the operand or if there as an error parsing,
3417   // we are done.
3418   //
3419   // If we are parsing after we reach EndOfStatement then this means we
3420   // are appending default values to the Operands list.  This is only done
3421   // by custom parser, so we shouldn't continue on to the generic parsing.
3422   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3423       getLexer().is(AsmToken::EndOfStatement))
3424     return ResTy;
3425 
3426   ResTy = parseRegOrImm(Operands);
3427 
3428   if (ResTy == MatchOperand_Success)
3429     return ResTy;
3430 
3431   const auto &Tok = Parser.getTok();
3432   SMLoc S = Tok.getLoc();
3433 
3434   const MCExpr *Expr = nullptr;
3435   if (!Parser.parseExpression(Expr)) {
3436     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3437     return MatchOperand_Success;
3438   }
3439 
3440   // Possibly this is an instruction flag like 'gds'.
3441   if (Tok.getKind() == AsmToken::Identifier) {
3442     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3443     Parser.Lex();
3444     return MatchOperand_Success;
3445   }
3446 
3447   return MatchOperand_NoMatch;
3448 }
3449 
3450 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3451   // Clear any forced encodings from the previous instruction.
3452   setForcedEncodingSize(0);
3453   setForcedDPP(false);
3454   setForcedSDWA(false);
3455 
3456   if (Name.endswith("_e64")) {
3457     setForcedEncodingSize(64);
3458     return Name.substr(0, Name.size() - 4);
3459   } else if (Name.endswith("_e32")) {
3460     setForcedEncodingSize(32);
3461     return Name.substr(0, Name.size() - 4);
3462   } else if (Name.endswith("_dpp")) {
3463     setForcedDPP(true);
3464     return Name.substr(0, Name.size() - 4);
3465   } else if (Name.endswith("_sdwa")) {
3466     setForcedSDWA(true);
3467     return Name.substr(0, Name.size() - 5);
3468   }
3469   return Name;
3470 }
3471 
3472 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3473                                        StringRef Name,
3474                                        SMLoc NameLoc, OperandVector &Operands) {
3475   // Add the instruction mnemonic
3476   Name = parseMnemonicSuffix(Name);
3477   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3478 
3479   while (!getLexer().is(AsmToken::EndOfStatement)) {
3480     OperandMatchResultTy Res = parseOperand(Operands, Name);
3481 
3482     // Eat the comma or space if there is one.
3483     if (getLexer().is(AsmToken::Comma))
3484       Parser.Lex();
3485 
3486     switch (Res) {
3487       case MatchOperand_Success: break;
3488       case MatchOperand_ParseFail:
3489         Error(getLexer().getLoc(), "failed parsing operand.");
3490         while (!getLexer().is(AsmToken::EndOfStatement)) {
3491           Parser.Lex();
3492         }
3493         return true;
3494       case MatchOperand_NoMatch:
3495         Error(getLexer().getLoc(), "not a valid operand.");
3496         while (!getLexer().is(AsmToken::EndOfStatement)) {
3497           Parser.Lex();
3498         }
3499         return true;
3500     }
3501   }
3502 
3503   return false;
3504 }
3505 
3506 //===----------------------------------------------------------------------===//
3507 // Utility functions
3508 //===----------------------------------------------------------------------===//
3509 
3510 OperandMatchResultTy
3511 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3512   switch(getLexer().getKind()) {
3513     default: return MatchOperand_NoMatch;
3514     case AsmToken::Identifier: {
3515       StringRef Name = Parser.getTok().getString();
3516       if (!Name.equals(Prefix)) {
3517         return MatchOperand_NoMatch;
3518       }
3519 
3520       Parser.Lex();
3521       if (getLexer().isNot(AsmToken::Colon))
3522         return MatchOperand_ParseFail;
3523 
3524       Parser.Lex();
3525 
3526       bool IsMinus = false;
3527       if (getLexer().getKind() == AsmToken::Minus) {
3528         Parser.Lex();
3529         IsMinus = true;
3530       }
3531 
3532       if (getLexer().isNot(AsmToken::Integer))
3533         return MatchOperand_ParseFail;
3534 
3535       if (getParser().parseAbsoluteExpression(Int))
3536         return MatchOperand_ParseFail;
3537 
3538       if (IsMinus)
3539         Int = -Int;
3540       break;
3541     }
3542   }
3543   return MatchOperand_Success;
3544 }
3545 
3546 OperandMatchResultTy
3547 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3548                                     AMDGPUOperand::ImmTy ImmTy,
3549                                     bool (*ConvertResult)(int64_t&)) {
3550   SMLoc S = Parser.getTok().getLoc();
3551   int64_t Value = 0;
3552 
3553   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3554   if (Res != MatchOperand_Success)
3555     return Res;
3556 
3557   if (ConvertResult && !ConvertResult(Value)) {
3558     return MatchOperand_ParseFail;
3559   }
3560 
3561   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3562   return MatchOperand_Success;
3563 }
3564 
3565 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3566   const char *Prefix,
3567   OperandVector &Operands,
3568   AMDGPUOperand::ImmTy ImmTy,
3569   bool (*ConvertResult)(int64_t&)) {
3570   StringRef Name = Parser.getTok().getString();
3571   if (!Name.equals(Prefix))
3572     return MatchOperand_NoMatch;
3573 
3574   Parser.Lex();
3575   if (getLexer().isNot(AsmToken::Colon))
3576     return MatchOperand_ParseFail;
3577 
3578   Parser.Lex();
3579   if (getLexer().isNot(AsmToken::LBrac))
3580     return MatchOperand_ParseFail;
3581   Parser.Lex();
3582 
3583   unsigned Val = 0;
3584   SMLoc S = Parser.getTok().getLoc();
3585 
3586   // FIXME: How to verify the number of elements matches the number of src
3587   // operands?
3588   for (int I = 0; I < 4; ++I) {
3589     if (I != 0) {
3590       if (getLexer().is(AsmToken::RBrac))
3591         break;
3592 
3593       if (getLexer().isNot(AsmToken::Comma))
3594         return MatchOperand_ParseFail;
3595       Parser.Lex();
3596     }
3597 
3598     if (getLexer().isNot(AsmToken::Integer))
3599       return MatchOperand_ParseFail;
3600 
3601     int64_t Op;
3602     if (getParser().parseAbsoluteExpression(Op))
3603       return MatchOperand_ParseFail;
3604 
3605     if (Op != 0 && Op != 1)
3606       return MatchOperand_ParseFail;
3607     Val |= (Op << I);
3608   }
3609 
3610   Parser.Lex();
3611   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3612   return MatchOperand_Success;
3613 }
3614 
3615 OperandMatchResultTy
3616 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3617                                AMDGPUOperand::ImmTy ImmTy) {
3618   int64_t Bit = 0;
3619   SMLoc S = Parser.getTok().getLoc();
3620 
3621   // We are at the end of the statement, and this is a default argument, so
3622   // use a default value.
3623   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3624     switch(getLexer().getKind()) {
3625       case AsmToken::Identifier: {
3626         StringRef Tok = Parser.getTok().getString();
3627         if (Tok == Name) {
3628           if (Tok == "r128" && isGFX9())
3629             Error(S, "r128 modifier is not supported on this GPU");
3630           if (Tok == "a16" && !isGFX9())
3631             Error(S, "a16 modifier is not supported on this GPU");
3632           Bit = 1;
3633           Parser.Lex();
3634         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3635           Bit = 0;
3636           Parser.Lex();
3637         } else {
3638           return MatchOperand_NoMatch;
3639         }
3640         break;
3641       }
3642       default:
3643         return MatchOperand_NoMatch;
3644     }
3645   }
3646 
3647   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3648   return MatchOperand_Success;
3649 }
3650 
3651 static void addOptionalImmOperand(
3652   MCInst& Inst, const OperandVector& Operands,
3653   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3654   AMDGPUOperand::ImmTy ImmT,
3655   int64_t Default = 0) {
3656   auto i = OptionalIdx.find(ImmT);
3657   if (i != OptionalIdx.end()) {
3658     unsigned Idx = i->second;
3659     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3660   } else {
3661     Inst.addOperand(MCOperand::createImm(Default));
3662   }
3663 }
3664 
3665 OperandMatchResultTy
3666 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3667   if (getLexer().isNot(AsmToken::Identifier)) {
3668     return MatchOperand_NoMatch;
3669   }
3670   StringRef Tok = Parser.getTok().getString();
3671   if (Tok != Prefix) {
3672     return MatchOperand_NoMatch;
3673   }
3674 
3675   Parser.Lex();
3676   if (getLexer().isNot(AsmToken::Colon)) {
3677     return MatchOperand_ParseFail;
3678   }
3679 
3680   Parser.Lex();
3681   if (getLexer().isNot(AsmToken::Identifier)) {
3682     return MatchOperand_ParseFail;
3683   }
3684 
3685   Value = Parser.getTok().getString();
3686   return MatchOperand_Success;
3687 }
3688 
3689 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3690 // values to live in a joint format operand in the MCInst encoding.
3691 OperandMatchResultTy
3692 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3693   SMLoc S = Parser.getTok().getLoc();
3694   int64_t Dfmt = 0, Nfmt = 0;
3695   // dfmt and nfmt can appear in either order, and each is optional.
3696   bool GotDfmt = false, GotNfmt = false;
3697   while (!GotDfmt || !GotNfmt) {
3698     if (!GotDfmt) {
3699       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3700       if (Res != MatchOperand_NoMatch) {
3701         if (Res != MatchOperand_Success)
3702           return Res;
3703         if (Dfmt >= 16) {
3704           Error(Parser.getTok().getLoc(), "out of range dfmt");
3705           return MatchOperand_ParseFail;
3706         }
3707         GotDfmt = true;
3708         Parser.Lex();
3709         continue;
3710       }
3711     }
3712     if (!GotNfmt) {
3713       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3714       if (Res != MatchOperand_NoMatch) {
3715         if (Res != MatchOperand_Success)
3716           return Res;
3717         if (Nfmt >= 8) {
3718           Error(Parser.getTok().getLoc(), "out of range nfmt");
3719           return MatchOperand_ParseFail;
3720         }
3721         GotNfmt = true;
3722         Parser.Lex();
3723         continue;
3724       }
3725     }
3726     break;
3727   }
3728   if (!GotDfmt && !GotNfmt)
3729     return MatchOperand_NoMatch;
3730   auto Format = Dfmt | Nfmt << 4;
3731   Operands.push_back(
3732       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3733   return MatchOperand_Success;
3734 }
3735 
3736 //===----------------------------------------------------------------------===//
3737 // ds
3738 //===----------------------------------------------------------------------===//
3739 
3740 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3741                                     const OperandVector &Operands) {
3742   OptionalImmIndexMap OptionalIdx;
3743 
3744   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3745     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3746 
3747     // Add the register arguments
3748     if (Op.isReg()) {
3749       Op.addRegOperands(Inst, 1);
3750       continue;
3751     }
3752 
3753     // Handle optional arguments
3754     OptionalIdx[Op.getImmTy()] = i;
3755   }
3756 
3757   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3758   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3759   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3760 
3761   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3762 }
3763 
3764 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3765                                 bool IsGdsHardcoded) {
3766   OptionalImmIndexMap OptionalIdx;
3767 
3768   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3769     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3770 
3771     // Add the register arguments
3772     if (Op.isReg()) {
3773       Op.addRegOperands(Inst, 1);
3774       continue;
3775     }
3776 
3777     if (Op.isToken() && Op.getToken() == "gds") {
3778       IsGdsHardcoded = true;
3779       continue;
3780     }
3781 
3782     // Handle optional arguments
3783     OptionalIdx[Op.getImmTy()] = i;
3784   }
3785 
3786   AMDGPUOperand::ImmTy OffsetType =
3787     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3788      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3789                                                       AMDGPUOperand::ImmTyOffset;
3790 
3791   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3792 
3793   if (!IsGdsHardcoded) {
3794     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3795   }
3796   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3797 }
3798 
3799 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3800   OptionalImmIndexMap OptionalIdx;
3801 
3802   unsigned OperandIdx[4];
3803   unsigned EnMask = 0;
3804   int SrcIdx = 0;
3805 
3806   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3807     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3808 
3809     // Add the register arguments
3810     if (Op.isReg()) {
3811       assert(SrcIdx < 4);
3812       OperandIdx[SrcIdx] = Inst.size();
3813       Op.addRegOperands(Inst, 1);
3814       ++SrcIdx;
3815       continue;
3816     }
3817 
3818     if (Op.isOff()) {
3819       assert(SrcIdx < 4);
3820       OperandIdx[SrcIdx] = Inst.size();
3821       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3822       ++SrcIdx;
3823       continue;
3824     }
3825 
3826     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3827       Op.addImmOperands(Inst, 1);
3828       continue;
3829     }
3830 
3831     if (Op.isToken() && Op.getToken() == "done")
3832       continue;
3833 
3834     // Handle optional arguments
3835     OptionalIdx[Op.getImmTy()] = i;
3836   }
3837 
3838   assert(SrcIdx == 4);
3839 
3840   bool Compr = false;
3841   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3842     Compr = true;
3843     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3844     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3845     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3846   }
3847 
3848   for (auto i = 0; i < SrcIdx; ++i) {
3849     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3850       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3851     }
3852   }
3853 
3854   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3855   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3856 
3857   Inst.addOperand(MCOperand::createImm(EnMask));
3858 }
3859 
3860 //===----------------------------------------------------------------------===//
3861 // s_waitcnt
3862 //===----------------------------------------------------------------------===//
3863 
3864 static bool
3865 encodeCnt(
3866   const AMDGPU::IsaVersion ISA,
3867   int64_t &IntVal,
3868   int64_t CntVal,
3869   bool Saturate,
3870   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3871   unsigned (*decode)(const IsaVersion &Version, unsigned))
3872 {
3873   bool Failed = false;
3874 
3875   IntVal = encode(ISA, IntVal, CntVal);
3876   if (CntVal != decode(ISA, IntVal)) {
3877     if (Saturate) {
3878       IntVal = encode(ISA, IntVal, -1);
3879     } else {
3880       Failed = true;
3881     }
3882   }
3883   return Failed;
3884 }
3885 
3886 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3887   StringRef CntName = Parser.getTok().getString();
3888   int64_t CntVal;
3889 
3890   Parser.Lex();
3891   if (getLexer().isNot(AsmToken::LParen))
3892     return true;
3893 
3894   Parser.Lex();
3895   if (getLexer().isNot(AsmToken::Integer))
3896     return true;
3897 
3898   SMLoc ValLoc = Parser.getTok().getLoc();
3899   if (getParser().parseAbsoluteExpression(CntVal))
3900     return true;
3901 
3902   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3903 
3904   bool Failed = true;
3905   bool Sat = CntName.endswith("_sat");
3906 
3907   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3908     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3909   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3910     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3911   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3912     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3913   }
3914 
3915   if (Failed) {
3916     Error(ValLoc, "too large value for " + CntName);
3917     return true;
3918   }
3919 
3920   if (getLexer().isNot(AsmToken::RParen)) {
3921     return true;
3922   }
3923 
3924   Parser.Lex();
3925   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3926     const AsmToken NextToken = getLexer().peekTok();
3927     if (NextToken.is(AsmToken::Identifier)) {
3928       Parser.Lex();
3929     }
3930   }
3931 
3932   return false;
3933 }
3934 
3935 OperandMatchResultTy
3936 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3937   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3938   int64_t Waitcnt = getWaitcntBitMask(ISA);
3939   SMLoc S = Parser.getTok().getLoc();
3940 
3941   switch(getLexer().getKind()) {
3942     default: return MatchOperand_ParseFail;
3943     case AsmToken::Integer:
3944       // The operand can be an integer value.
3945       if (getParser().parseAbsoluteExpression(Waitcnt))
3946         return MatchOperand_ParseFail;
3947       break;
3948 
3949     case AsmToken::Identifier:
3950       do {
3951         if (parseCnt(Waitcnt))
3952           return MatchOperand_ParseFail;
3953       } while(getLexer().isNot(AsmToken::EndOfStatement));
3954       break;
3955   }
3956   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3957   return MatchOperand_Success;
3958 }
3959 
3960 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3961                                           int64_t &Width) {
3962   using namespace llvm::AMDGPU::Hwreg;
3963 
3964   if (Parser.getTok().getString() != "hwreg")
3965     return true;
3966   Parser.Lex();
3967 
3968   if (getLexer().isNot(AsmToken::LParen))
3969     return true;
3970   Parser.Lex();
3971 
3972   if (getLexer().is(AsmToken::Identifier)) {
3973     HwReg.IsSymbolic = true;
3974     HwReg.Id = ID_UNKNOWN_;
3975     const StringRef tok = Parser.getTok().getString();
3976     int Last = ID_SYMBOLIC_LAST_;
3977     if (isSI() || isCI() || isVI())
3978       Last = ID_SYMBOLIC_FIRST_GFX9_;
3979     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3980       if (tok == IdSymbolic[i]) {
3981         HwReg.Id = i;
3982         break;
3983       }
3984     }
3985     Parser.Lex();
3986   } else {
3987     HwReg.IsSymbolic = false;
3988     if (getLexer().isNot(AsmToken::Integer))
3989       return true;
3990     if (getParser().parseAbsoluteExpression(HwReg.Id))
3991       return true;
3992   }
3993 
3994   if (getLexer().is(AsmToken::RParen)) {
3995     Parser.Lex();
3996     return false;
3997   }
3998 
3999   // optional params
4000   if (getLexer().isNot(AsmToken::Comma))
4001     return true;
4002   Parser.Lex();
4003 
4004   if (getLexer().isNot(AsmToken::Integer))
4005     return true;
4006   if (getParser().parseAbsoluteExpression(Offset))
4007     return true;
4008 
4009   if (getLexer().isNot(AsmToken::Comma))
4010     return true;
4011   Parser.Lex();
4012 
4013   if (getLexer().isNot(AsmToken::Integer))
4014     return true;
4015   if (getParser().parseAbsoluteExpression(Width))
4016     return true;
4017 
4018   if (getLexer().isNot(AsmToken::RParen))
4019     return true;
4020   Parser.Lex();
4021 
4022   return false;
4023 }
4024 
4025 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4026   using namespace llvm::AMDGPU::Hwreg;
4027 
4028   int64_t Imm16Val = 0;
4029   SMLoc S = Parser.getTok().getLoc();
4030 
4031   switch(getLexer().getKind()) {
4032     default: return MatchOperand_NoMatch;
4033     case AsmToken::Integer:
4034       // The operand can be an integer value.
4035       if (getParser().parseAbsoluteExpression(Imm16Val))
4036         return MatchOperand_NoMatch;
4037       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4038         Error(S, "invalid immediate: only 16-bit values are legal");
4039         // Do not return error code, but create an imm operand anyway and proceed
4040         // to the next operand, if any. That avoids unneccessary error messages.
4041       }
4042       break;
4043 
4044     case AsmToken::Identifier: {
4045         OperandInfoTy HwReg(ID_UNKNOWN_);
4046         int64_t Offset = OFFSET_DEFAULT_;
4047         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4048         if (parseHwregConstruct(HwReg, Offset, Width))
4049           return MatchOperand_ParseFail;
4050         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4051           if (HwReg.IsSymbolic)
4052             Error(S, "invalid symbolic name of hardware register");
4053           else
4054             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4055         }
4056         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4057           Error(S, "invalid bit offset: only 5-bit values are legal");
4058         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4059           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4060         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4061       }
4062       break;
4063   }
4064   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4065   return MatchOperand_Success;
4066 }
4067 
4068 bool AMDGPUOperand::isSWaitCnt() const {
4069   return isImm();
4070 }
4071 
4072 bool AMDGPUOperand::isHwreg() const {
4073   return isImmTy(ImmTyHwreg);
4074 }
4075 
4076 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4077   using namespace llvm::AMDGPU::SendMsg;
4078 
4079   if (Parser.getTok().getString() != "sendmsg")
4080     return true;
4081   Parser.Lex();
4082 
4083   if (getLexer().isNot(AsmToken::LParen))
4084     return true;
4085   Parser.Lex();
4086 
4087   if (getLexer().is(AsmToken::Identifier)) {
4088     Msg.IsSymbolic = true;
4089     Msg.Id = ID_UNKNOWN_;
4090     const std::string tok = Parser.getTok().getString();
4091     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4092       switch(i) {
4093         default: continue; // Omit gaps.
4094         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4095       }
4096       if (tok == IdSymbolic[i]) {
4097         Msg.Id = i;
4098         break;
4099       }
4100     }
4101     Parser.Lex();
4102   } else {
4103     Msg.IsSymbolic = false;
4104     if (getLexer().isNot(AsmToken::Integer))
4105       return true;
4106     if (getParser().parseAbsoluteExpression(Msg.Id))
4107       return true;
4108     if (getLexer().is(AsmToken::Integer))
4109       if (getParser().parseAbsoluteExpression(Msg.Id))
4110         Msg.Id = ID_UNKNOWN_;
4111   }
4112   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4113     return false;
4114 
4115   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4116     if (getLexer().isNot(AsmToken::RParen))
4117       return true;
4118     Parser.Lex();
4119     return false;
4120   }
4121 
4122   if (getLexer().isNot(AsmToken::Comma))
4123     return true;
4124   Parser.Lex();
4125 
4126   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4127   Operation.Id = ID_UNKNOWN_;
4128   if (getLexer().is(AsmToken::Identifier)) {
4129     Operation.IsSymbolic = true;
4130     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4131     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4132     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4133     const StringRef Tok = Parser.getTok().getString();
4134     for (int i = F; i < L; ++i) {
4135       if (Tok == S[i]) {
4136         Operation.Id = i;
4137         break;
4138       }
4139     }
4140     Parser.Lex();
4141   } else {
4142     Operation.IsSymbolic = false;
4143     if (getLexer().isNot(AsmToken::Integer))
4144       return true;
4145     if (getParser().parseAbsoluteExpression(Operation.Id))
4146       return true;
4147   }
4148 
4149   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4150     // Stream id is optional.
4151     if (getLexer().is(AsmToken::RParen)) {
4152       Parser.Lex();
4153       return false;
4154     }
4155 
4156     if (getLexer().isNot(AsmToken::Comma))
4157       return true;
4158     Parser.Lex();
4159 
4160     if (getLexer().isNot(AsmToken::Integer))
4161       return true;
4162     if (getParser().parseAbsoluteExpression(StreamId))
4163       return true;
4164   }
4165 
4166   if (getLexer().isNot(AsmToken::RParen))
4167     return true;
4168   Parser.Lex();
4169   return false;
4170 }
4171 
4172 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4173   if (getLexer().getKind() != AsmToken::Identifier)
4174     return MatchOperand_NoMatch;
4175 
4176   StringRef Str = Parser.getTok().getString();
4177   int Slot = StringSwitch<int>(Str)
4178     .Case("p10", 0)
4179     .Case("p20", 1)
4180     .Case("p0", 2)
4181     .Default(-1);
4182 
4183   SMLoc S = Parser.getTok().getLoc();
4184   if (Slot == -1)
4185     return MatchOperand_ParseFail;
4186 
4187   Parser.Lex();
4188   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4189                                               AMDGPUOperand::ImmTyInterpSlot));
4190   return MatchOperand_Success;
4191 }
4192 
4193 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4194   if (getLexer().getKind() != AsmToken::Identifier)
4195     return MatchOperand_NoMatch;
4196 
4197   StringRef Str = Parser.getTok().getString();
4198   if (!Str.startswith("attr"))
4199     return MatchOperand_NoMatch;
4200 
4201   StringRef Chan = Str.take_back(2);
4202   int AttrChan = StringSwitch<int>(Chan)
4203     .Case(".x", 0)
4204     .Case(".y", 1)
4205     .Case(".z", 2)
4206     .Case(".w", 3)
4207     .Default(-1);
4208   if (AttrChan == -1)
4209     return MatchOperand_ParseFail;
4210 
4211   Str = Str.drop_back(2).drop_front(4);
4212 
4213   uint8_t Attr;
4214   if (Str.getAsInteger(10, Attr))
4215     return MatchOperand_ParseFail;
4216 
4217   SMLoc S = Parser.getTok().getLoc();
4218   Parser.Lex();
4219   if (Attr > 63) {
4220     Error(S, "out of bounds attr");
4221     return MatchOperand_Success;
4222   }
4223 
4224   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4225 
4226   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4227                                               AMDGPUOperand::ImmTyInterpAttr));
4228   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4229                                               AMDGPUOperand::ImmTyAttrChan));
4230   return MatchOperand_Success;
4231 }
4232 
4233 void AMDGPUAsmParser::errorExpTgt() {
4234   Error(Parser.getTok().getLoc(), "invalid exp target");
4235 }
4236 
4237 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4238                                                       uint8_t &Val) {
4239   if (Str == "null") {
4240     Val = 9;
4241     return MatchOperand_Success;
4242   }
4243 
4244   if (Str.startswith("mrt")) {
4245     Str = Str.drop_front(3);
4246     if (Str == "z") { // == mrtz
4247       Val = 8;
4248       return MatchOperand_Success;
4249     }
4250 
4251     if (Str.getAsInteger(10, Val))
4252       return MatchOperand_ParseFail;
4253 
4254     if (Val > 7)
4255       errorExpTgt();
4256 
4257     return MatchOperand_Success;
4258   }
4259 
4260   if (Str.startswith("pos")) {
4261     Str = Str.drop_front(3);
4262     if (Str.getAsInteger(10, Val))
4263       return MatchOperand_ParseFail;
4264 
4265     if (Val > 3)
4266       errorExpTgt();
4267 
4268     Val += 12;
4269     return MatchOperand_Success;
4270   }
4271 
4272   if (Str.startswith("param")) {
4273     Str = Str.drop_front(5);
4274     if (Str.getAsInteger(10, Val))
4275       return MatchOperand_ParseFail;
4276 
4277     if (Val >= 32)
4278       errorExpTgt();
4279 
4280     Val += 32;
4281     return MatchOperand_Success;
4282   }
4283 
4284   if (Str.startswith("invalid_target_")) {
4285     Str = Str.drop_front(15);
4286     if (Str.getAsInteger(10, Val))
4287       return MatchOperand_ParseFail;
4288 
4289     errorExpTgt();
4290     return MatchOperand_Success;
4291   }
4292 
4293   return MatchOperand_NoMatch;
4294 }
4295 
4296 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4297   uint8_t Val;
4298   StringRef Str = Parser.getTok().getString();
4299 
4300   auto Res = parseExpTgtImpl(Str, Val);
4301   if (Res != MatchOperand_Success)
4302     return Res;
4303 
4304   SMLoc S = Parser.getTok().getLoc();
4305   Parser.Lex();
4306 
4307   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4308                                               AMDGPUOperand::ImmTyExpTgt));
4309   return MatchOperand_Success;
4310 }
4311 
4312 OperandMatchResultTy
4313 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4314   using namespace llvm::AMDGPU::SendMsg;
4315 
4316   int64_t Imm16Val = 0;
4317   SMLoc S = Parser.getTok().getLoc();
4318 
4319   switch(getLexer().getKind()) {
4320   default:
4321     return MatchOperand_NoMatch;
4322   case AsmToken::Integer:
4323     // The operand can be an integer value.
4324     if (getParser().parseAbsoluteExpression(Imm16Val))
4325       return MatchOperand_NoMatch;
4326     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4327       Error(S, "invalid immediate: only 16-bit values are legal");
4328       // Do not return error code, but create an imm operand anyway and proceed
4329       // to the next operand, if any. That avoids unneccessary error messages.
4330     }
4331     break;
4332   case AsmToken::Identifier: {
4333       OperandInfoTy Msg(ID_UNKNOWN_);
4334       OperandInfoTy Operation(OP_UNKNOWN_);
4335       int64_t StreamId = STREAM_ID_DEFAULT_;
4336       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4337         return MatchOperand_ParseFail;
4338       do {
4339         // Validate and encode message ID.
4340         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4341                 || Msg.Id == ID_SYSMSG)) {
4342           if (Msg.IsSymbolic)
4343             Error(S, "invalid/unsupported symbolic name of message");
4344           else
4345             Error(S, "invalid/unsupported code of message");
4346           break;
4347         }
4348         Imm16Val = (Msg.Id << ID_SHIFT_);
4349         // Validate and encode operation ID.
4350         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4351           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4352             if (Operation.IsSymbolic)
4353               Error(S, "invalid symbolic name of GS_OP");
4354             else
4355               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4356             break;
4357           }
4358           if (Operation.Id == OP_GS_NOP
4359               && Msg.Id != ID_GS_DONE) {
4360             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4361             break;
4362           }
4363           Imm16Val |= (Operation.Id << OP_SHIFT_);
4364         }
4365         if (Msg.Id == ID_SYSMSG) {
4366           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4367             if (Operation.IsSymbolic)
4368               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4369             else
4370               Error(S, "invalid/unsupported code of SYSMSG_OP");
4371             break;
4372           }
4373           Imm16Val |= (Operation.Id << OP_SHIFT_);
4374         }
4375         // Validate and encode stream ID.
4376         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4377           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4378             Error(S, "invalid stream id: only 2-bit values are legal");
4379             break;
4380           }
4381           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4382         }
4383       } while (false);
4384     }
4385     break;
4386   }
4387   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4388   return MatchOperand_Success;
4389 }
4390 
4391 bool AMDGPUOperand::isSendMsg() const {
4392   return isImmTy(ImmTySendMsg);
4393 }
4394 
4395 //===----------------------------------------------------------------------===//
4396 // parser helpers
4397 //===----------------------------------------------------------------------===//
4398 
4399 bool
4400 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4401   if (getLexer().getKind() == AsmToken::Identifier &&
4402       Parser.getTok().getString() == Id) {
4403     Parser.Lex();
4404     return true;
4405   }
4406   return false;
4407 }
4408 
4409 bool
4410 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4411   if (getLexer().getKind() == Kind) {
4412     Parser.Lex();
4413     return true;
4414   }
4415   return false;
4416 }
4417 
4418 bool
4419 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4420                            const StringRef ErrMsg) {
4421   if (!trySkipToken(Kind)) {
4422     Error(Parser.getTok().getLoc(), ErrMsg);
4423     return false;
4424   }
4425   return true;
4426 }
4427 
4428 bool
4429 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4430   return !getParser().parseAbsoluteExpression(Imm);
4431 }
4432 
4433 bool
4434 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4435   SMLoc S = Parser.getTok().getLoc();
4436   if (getLexer().getKind() == AsmToken::String) {
4437     Val = Parser.getTok().getStringContents();
4438     Parser.Lex();
4439     return true;
4440   } else {
4441     Error(S, ErrMsg);
4442     return false;
4443   }
4444 }
4445 
4446 //===----------------------------------------------------------------------===//
4447 // swizzle
4448 //===----------------------------------------------------------------------===//
4449 
4450 LLVM_READNONE
4451 static unsigned
4452 encodeBitmaskPerm(const unsigned AndMask,
4453                   const unsigned OrMask,
4454                   const unsigned XorMask) {
4455   using namespace llvm::AMDGPU::Swizzle;
4456 
4457   return BITMASK_PERM_ENC |
4458          (AndMask << BITMASK_AND_SHIFT) |
4459          (OrMask  << BITMASK_OR_SHIFT)  |
4460          (XorMask << BITMASK_XOR_SHIFT);
4461 }
4462 
4463 bool
4464 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4465                                       const unsigned MinVal,
4466                                       const unsigned MaxVal,
4467                                       const StringRef ErrMsg) {
4468   for (unsigned i = 0; i < OpNum; ++i) {
4469     if (!skipToken(AsmToken::Comma, "expected a comma")){
4470       return false;
4471     }
4472     SMLoc ExprLoc = Parser.getTok().getLoc();
4473     if (!parseExpr(Op[i])) {
4474       return false;
4475     }
4476     if (Op[i] < MinVal || Op[i] > MaxVal) {
4477       Error(ExprLoc, ErrMsg);
4478       return false;
4479     }
4480   }
4481 
4482   return true;
4483 }
4484 
4485 bool
4486 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4487   using namespace llvm::AMDGPU::Swizzle;
4488 
4489   int64_t Lane[LANE_NUM];
4490   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4491                            "expected a 2-bit lane id")) {
4492     Imm = QUAD_PERM_ENC;
4493     for (auto i = 0; i < LANE_NUM; ++i) {
4494       Imm |= Lane[i] << (LANE_SHIFT * i);
4495     }
4496     return true;
4497   }
4498   return false;
4499 }
4500 
4501 bool
4502 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4503   using namespace llvm::AMDGPU::Swizzle;
4504 
4505   SMLoc S = Parser.getTok().getLoc();
4506   int64_t GroupSize;
4507   int64_t LaneIdx;
4508 
4509   if (!parseSwizzleOperands(1, &GroupSize,
4510                             2, 32,
4511                             "group size must be in the interval [2,32]")) {
4512     return false;
4513   }
4514   if (!isPowerOf2_64(GroupSize)) {
4515     Error(S, "group size must be a power of two");
4516     return false;
4517   }
4518   if (parseSwizzleOperands(1, &LaneIdx,
4519                            0, GroupSize - 1,
4520                            "lane id must be in the interval [0,group size - 1]")) {
4521     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4522     return true;
4523   }
4524   return false;
4525 }
4526 
4527 bool
4528 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4529   using namespace llvm::AMDGPU::Swizzle;
4530 
4531   SMLoc S = Parser.getTok().getLoc();
4532   int64_t GroupSize;
4533 
4534   if (!parseSwizzleOperands(1, &GroupSize,
4535       2, 32, "group size must be in the interval [2,32]")) {
4536     return false;
4537   }
4538   if (!isPowerOf2_64(GroupSize)) {
4539     Error(S, "group size must be a power of two");
4540     return false;
4541   }
4542 
4543   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4544   return true;
4545 }
4546 
4547 bool
4548 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4549   using namespace llvm::AMDGPU::Swizzle;
4550 
4551   SMLoc S = Parser.getTok().getLoc();
4552   int64_t GroupSize;
4553 
4554   if (!parseSwizzleOperands(1, &GroupSize,
4555       1, 16, "group size must be in the interval [1,16]")) {
4556     return false;
4557   }
4558   if (!isPowerOf2_64(GroupSize)) {
4559     Error(S, "group size must be a power of two");
4560     return false;
4561   }
4562 
4563   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4564   return true;
4565 }
4566 
4567 bool
4568 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4569   using namespace llvm::AMDGPU::Swizzle;
4570 
4571   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4572     return false;
4573   }
4574 
4575   StringRef Ctl;
4576   SMLoc StrLoc = Parser.getTok().getLoc();
4577   if (!parseString(Ctl)) {
4578     return false;
4579   }
4580   if (Ctl.size() != BITMASK_WIDTH) {
4581     Error(StrLoc, "expected a 5-character mask");
4582     return false;
4583   }
4584 
4585   unsigned AndMask = 0;
4586   unsigned OrMask = 0;
4587   unsigned XorMask = 0;
4588 
4589   for (size_t i = 0; i < Ctl.size(); ++i) {
4590     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4591     switch(Ctl[i]) {
4592     default:
4593       Error(StrLoc, "invalid mask");
4594       return false;
4595     case '0':
4596       break;
4597     case '1':
4598       OrMask |= Mask;
4599       break;
4600     case 'p':
4601       AndMask |= Mask;
4602       break;
4603     case 'i':
4604       AndMask |= Mask;
4605       XorMask |= Mask;
4606       break;
4607     }
4608   }
4609 
4610   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4611   return true;
4612 }
4613 
4614 bool
4615 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4616 
4617   SMLoc OffsetLoc = Parser.getTok().getLoc();
4618 
4619   if (!parseExpr(Imm)) {
4620     return false;
4621   }
4622   if (!isUInt<16>(Imm)) {
4623     Error(OffsetLoc, "expected a 16-bit offset");
4624     return false;
4625   }
4626   return true;
4627 }
4628 
4629 bool
4630 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4631   using namespace llvm::AMDGPU::Swizzle;
4632 
4633   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4634 
4635     SMLoc ModeLoc = Parser.getTok().getLoc();
4636     bool Ok = false;
4637 
4638     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4639       Ok = parseSwizzleQuadPerm(Imm);
4640     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4641       Ok = parseSwizzleBitmaskPerm(Imm);
4642     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4643       Ok = parseSwizzleBroadcast(Imm);
4644     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4645       Ok = parseSwizzleSwap(Imm);
4646     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4647       Ok = parseSwizzleReverse(Imm);
4648     } else {
4649       Error(ModeLoc, "expected a swizzle mode");
4650     }
4651 
4652     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4653   }
4654 
4655   return false;
4656 }
4657 
4658 OperandMatchResultTy
4659 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4660   SMLoc S = Parser.getTok().getLoc();
4661   int64_t Imm = 0;
4662 
4663   if (trySkipId("offset")) {
4664 
4665     bool Ok = false;
4666     if (skipToken(AsmToken::Colon, "expected a colon")) {
4667       if (trySkipId("swizzle")) {
4668         Ok = parseSwizzleMacro(Imm);
4669       } else {
4670         Ok = parseSwizzleOffset(Imm);
4671       }
4672     }
4673 
4674     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4675 
4676     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4677   } else {
4678     // Swizzle "offset" operand is optional.
4679     // If it is omitted, try parsing other optional operands.
4680     return parseOptionalOpr(Operands);
4681   }
4682 }
4683 
4684 bool
4685 AMDGPUOperand::isSwizzle() const {
4686   return isImmTy(ImmTySwizzle);
4687 }
4688 
4689 //===----------------------------------------------------------------------===//
4690 // VGPR Index Mode
4691 //===----------------------------------------------------------------------===//
4692 
4693 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4694 
4695   using namespace llvm::AMDGPU::VGPRIndexMode;
4696 
4697   if (trySkipToken(AsmToken::RParen)) {
4698     return OFF;
4699   }
4700 
4701   int64_t Imm = 0;
4702 
4703   while (true) {
4704     unsigned Mode = 0;
4705     SMLoc S = Parser.getTok().getLoc();
4706 
4707     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4708       if (trySkipId(IdSymbolic[ModeId])) {
4709         Mode = 1 << ModeId;
4710         break;
4711       }
4712     }
4713 
4714     if (Mode == 0) {
4715       Error(S, (Imm == 0)?
4716                "expected a VGPR index mode or a closing parenthesis" :
4717                "expected a VGPR index mode");
4718       break;
4719     }
4720 
4721     if (Imm & Mode) {
4722       Error(S, "duplicate VGPR index mode");
4723       break;
4724     }
4725     Imm |= Mode;
4726 
4727     if (trySkipToken(AsmToken::RParen))
4728       break;
4729     if (!skipToken(AsmToken::Comma,
4730                    "expected a comma or a closing parenthesis"))
4731       break;
4732   }
4733 
4734   return Imm;
4735 }
4736 
4737 OperandMatchResultTy
4738 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4739 
4740   int64_t Imm = 0;
4741   SMLoc S = Parser.getTok().getLoc();
4742 
4743   if (getLexer().getKind() == AsmToken::Identifier &&
4744       Parser.getTok().getString() == "gpr_idx" &&
4745       getLexer().peekTok().is(AsmToken::LParen)) {
4746 
4747     Parser.Lex();
4748     Parser.Lex();
4749 
4750     // If parse failed, trigger an error but do not return error code
4751     // to avoid excessive error messages.
4752     Imm = parseGPRIdxMacro();
4753 
4754   } else {
4755     if (getParser().parseAbsoluteExpression(Imm))
4756       return MatchOperand_NoMatch;
4757     if (Imm < 0 || !isUInt<4>(Imm)) {
4758       Error(S, "invalid immediate: only 4-bit values are legal");
4759     }
4760   }
4761 
4762   Operands.push_back(
4763       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4764   return MatchOperand_Success;
4765 }
4766 
4767 bool AMDGPUOperand::isGPRIdxMode() const {
4768   return isImmTy(ImmTyGprIdxMode);
4769 }
4770 
4771 //===----------------------------------------------------------------------===//
4772 // sopp branch targets
4773 //===----------------------------------------------------------------------===//
4774 
4775 OperandMatchResultTy
4776 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4777   SMLoc S = Parser.getTok().getLoc();
4778 
4779   switch (getLexer().getKind()) {
4780     default: return MatchOperand_ParseFail;
4781     case AsmToken::Integer: {
4782       int64_t Imm;
4783       if (getParser().parseAbsoluteExpression(Imm))
4784         return MatchOperand_ParseFail;
4785       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4786       return MatchOperand_Success;
4787     }
4788 
4789     case AsmToken::Identifier:
4790       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4791           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4792                                   Parser.getTok().getString()), getContext()), S));
4793       Parser.Lex();
4794       return MatchOperand_Success;
4795   }
4796 }
4797 
4798 //===----------------------------------------------------------------------===//
4799 // mubuf
4800 //===----------------------------------------------------------------------===//
4801 
4802 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4803   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4804 }
4805 
4806 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4807   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4808 }
4809 
4810 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4811                                const OperandVector &Operands,
4812                                bool IsAtomic,
4813                                bool IsAtomicReturn,
4814                                bool IsLds) {
4815   bool IsLdsOpcode = IsLds;
4816   bool HasLdsModifier = false;
4817   OptionalImmIndexMap OptionalIdx;
4818   assert(IsAtomicReturn ? IsAtomic : true);
4819 
4820   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4821     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4822 
4823     // Add the register arguments
4824     if (Op.isReg()) {
4825       Op.addRegOperands(Inst, 1);
4826       continue;
4827     }
4828 
4829     // Handle the case where soffset is an immediate
4830     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4831       Op.addImmOperands(Inst, 1);
4832       continue;
4833     }
4834 
4835     HasLdsModifier = Op.isLDS();
4836 
4837     // Handle tokens like 'offen' which are sometimes hard-coded into the
4838     // asm string.  There are no MCInst operands for these.
4839     if (Op.isToken()) {
4840       continue;
4841     }
4842     assert(Op.isImm());
4843 
4844     // Handle optional arguments
4845     OptionalIdx[Op.getImmTy()] = i;
4846   }
4847 
4848   // This is a workaround for an llvm quirk which may result in an
4849   // incorrect instruction selection. Lds and non-lds versions of
4850   // MUBUF instructions are identical except that lds versions
4851   // have mandatory 'lds' modifier. However this modifier follows
4852   // optional modifiers and llvm asm matcher regards this 'lds'
4853   // modifier as an optional one. As a result, an lds version
4854   // of opcode may be selected even if it has no 'lds' modifier.
4855   if (IsLdsOpcode && !HasLdsModifier) {
4856     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4857     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4858       Inst.setOpcode(NoLdsOpcode);
4859       IsLdsOpcode = false;
4860     }
4861   }
4862 
4863   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4864   if (IsAtomicReturn) {
4865     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4866     Inst.insert(I, *I);
4867   }
4868 
4869   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4870   if (!IsAtomic) { // glc is hard-coded.
4871     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4872   }
4873   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4874 
4875   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4876     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4877   }
4878 }
4879 
4880 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4881   OptionalImmIndexMap OptionalIdx;
4882 
4883   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4884     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4885 
4886     // Add the register arguments
4887     if (Op.isReg()) {
4888       Op.addRegOperands(Inst, 1);
4889       continue;
4890     }
4891 
4892     // Handle the case where soffset is an immediate
4893     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4894       Op.addImmOperands(Inst, 1);
4895       continue;
4896     }
4897 
4898     // Handle tokens like 'offen' which are sometimes hard-coded into the
4899     // asm string.  There are no MCInst operands for these.
4900     if (Op.isToken()) {
4901       continue;
4902     }
4903     assert(Op.isImm());
4904 
4905     // Handle optional arguments
4906     OptionalIdx[Op.getImmTy()] = i;
4907   }
4908 
4909   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4910                         AMDGPUOperand::ImmTyOffset);
4911   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4912   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4913   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4914   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4915 }
4916 
4917 //===----------------------------------------------------------------------===//
4918 // mimg
4919 //===----------------------------------------------------------------------===//
4920 
4921 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4922                               bool IsAtomic) {
4923   unsigned I = 1;
4924   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4925   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4926     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4927   }
4928 
4929   if (IsAtomic) {
4930     // Add src, same as dst
4931     assert(Desc.getNumDefs() == 1);
4932     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4933   }
4934 
4935   OptionalImmIndexMap OptionalIdx;
4936 
4937   for (unsigned E = Operands.size(); I != E; ++I) {
4938     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4939 
4940     // Add the register arguments
4941     if (Op.isReg()) {
4942       Op.addRegOperands(Inst, 1);
4943     } else if (Op.isImmModifier()) {
4944       OptionalIdx[Op.getImmTy()] = I;
4945     } else {
4946       llvm_unreachable("unexpected operand type");
4947     }
4948   }
4949 
4950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4952   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4953   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4956   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4957   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4958   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4959 }
4960 
4961 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4962   cvtMIMG(Inst, Operands, true);
4963 }
4964 
4965 //===----------------------------------------------------------------------===//
4966 // smrd
4967 //===----------------------------------------------------------------------===//
4968 
4969 bool AMDGPUOperand::isSMRDOffset8() const {
4970   return isImm() && isUInt<8>(getImm());
4971 }
4972 
4973 bool AMDGPUOperand::isSMRDOffset20() const {
4974   return isImm() && isUInt<20>(getImm());
4975 }
4976 
4977 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4978   // 32-bit literals are only supported on CI and we only want to use them
4979   // when the offset is > 8-bits.
4980   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4981 }
4982 
4983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4984   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4985 }
4986 
4987 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4988   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4989 }
4990 
4991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4992   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4993 }
4994 
4995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4996   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4997 }
4998 
4999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5000   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5001 }
5002 
5003 //===----------------------------------------------------------------------===//
5004 // vop3
5005 //===----------------------------------------------------------------------===//
5006 
5007 static bool ConvertOmodMul(int64_t &Mul) {
5008   if (Mul != 1 && Mul != 2 && Mul != 4)
5009     return false;
5010 
5011   Mul >>= 1;
5012   return true;
5013 }
5014 
5015 static bool ConvertOmodDiv(int64_t &Div) {
5016   if (Div == 1) {
5017     Div = 0;
5018     return true;
5019   }
5020 
5021   if (Div == 2) {
5022     Div = 3;
5023     return true;
5024   }
5025 
5026   return false;
5027 }
5028 
5029 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5030   if (BoundCtrl == 0) {
5031     BoundCtrl = 1;
5032     return true;
5033   }
5034 
5035   if (BoundCtrl == -1) {
5036     BoundCtrl = 0;
5037     return true;
5038   }
5039 
5040   return false;
5041 }
5042 
5043 // Note: the order in this table matches the order of operands in AsmString.
5044 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5045   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5046   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5047   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5048   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5049   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5050   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5051   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5052   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5053   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5054   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5055   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5056   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5057   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5058   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5059   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5060   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5061   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5062   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5063   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5064   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5065   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5066   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5067   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5068   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5069   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5070   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5071   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5072   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5073   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5074   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5075   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5076   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5077   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5078   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5079   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5080   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5081   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5082 };
5083 
5084 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5085   unsigned size = Operands.size();
5086   assert(size > 0);
5087 
5088   OperandMatchResultTy res = parseOptionalOpr(Operands);
5089 
5090   // This is a hack to enable hardcoded mandatory operands which follow
5091   // optional operands.
5092   //
5093   // Current design assumes that all operands after the first optional operand
5094   // are also optional. However implementation of some instructions violates
5095   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5096   //
5097   // To alleviate this problem, we have to (implicitly) parse extra operands
5098   // to make sure autogenerated parser of custom operands never hit hardcoded
5099   // mandatory operands.
5100 
5101   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5102 
5103     // We have parsed the first optional operand.
5104     // Parse as many operands as necessary to skip all mandatory operands.
5105 
5106     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5107       if (res != MatchOperand_Success ||
5108           getLexer().is(AsmToken::EndOfStatement)) break;
5109       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5110       res = parseOptionalOpr(Operands);
5111     }
5112   }
5113 
5114   return res;
5115 }
5116 
5117 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5118   OperandMatchResultTy res;
5119   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5120     // try to parse any optional operand here
5121     if (Op.IsBit) {
5122       res = parseNamedBit(Op.Name, Operands, Op.Type);
5123     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5124       res = parseOModOperand(Operands);
5125     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5126                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5127                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5128       res = parseSDWASel(Operands, Op.Name, Op.Type);
5129     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5130       res = parseSDWADstUnused(Operands);
5131     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5132                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5133                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5134                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5135       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5136                                         Op.ConvertResult);
5137     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5138       res = parseDfmtNfmt(Operands);
5139     } else {
5140       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5141     }
5142     if (res != MatchOperand_NoMatch) {
5143       return res;
5144     }
5145   }
5146   return MatchOperand_NoMatch;
5147 }
5148 
5149 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5150   StringRef Name = Parser.getTok().getString();
5151   if (Name == "mul") {
5152     return parseIntWithPrefix("mul", Operands,
5153                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5154   }
5155 
5156   if (Name == "div") {
5157     return parseIntWithPrefix("div", Operands,
5158                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5159   }
5160 
5161   return MatchOperand_NoMatch;
5162 }
5163 
5164 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5165   cvtVOP3P(Inst, Operands);
5166 
5167   int Opc = Inst.getOpcode();
5168 
5169   int SrcNum;
5170   const int Ops[] = { AMDGPU::OpName::src0,
5171                       AMDGPU::OpName::src1,
5172                       AMDGPU::OpName::src2 };
5173   for (SrcNum = 0;
5174        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5175        ++SrcNum);
5176   assert(SrcNum > 0);
5177 
5178   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5179   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5180 
5181   if ((OpSel & (1 << SrcNum)) != 0) {
5182     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5183     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5184     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5185   }
5186 }
5187 
5188 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5189       // 1. This operand is input modifiers
5190   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5191       // 2. This is not last operand
5192       && Desc.NumOperands > (OpNum + 1)
5193       // 3. Next operand is register class
5194       && Desc.OpInfo[OpNum + 1].RegClass != -1
5195       // 4. Next register is not tied to any other operand
5196       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5197 }
5198 
5199 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5200 {
5201   OptionalImmIndexMap OptionalIdx;
5202   unsigned Opc = Inst.getOpcode();
5203 
5204   unsigned I = 1;
5205   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5206   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5207     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5208   }
5209 
5210   for (unsigned E = Operands.size(); I != E; ++I) {
5211     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5212     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5213       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5214     } else if (Op.isInterpSlot() ||
5215                Op.isInterpAttr() ||
5216                Op.isAttrChan()) {
5217       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5218     } else if (Op.isImmModifier()) {
5219       OptionalIdx[Op.getImmTy()] = I;
5220     } else {
5221       llvm_unreachable("unhandled operand type");
5222     }
5223   }
5224 
5225   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5226     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5227   }
5228 
5229   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5230     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5231   }
5232 
5233   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5234     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5235   }
5236 }
5237 
5238 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5239                               OptionalImmIndexMap &OptionalIdx) {
5240   unsigned Opc = Inst.getOpcode();
5241 
5242   unsigned I = 1;
5243   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5244   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5245     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5246   }
5247 
5248   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5249     // This instruction has src modifiers
5250     for (unsigned E = Operands.size(); I != E; ++I) {
5251       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5252       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5253         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5254       } else if (Op.isImmModifier()) {
5255         OptionalIdx[Op.getImmTy()] = I;
5256       } else if (Op.isRegOrImm()) {
5257         Op.addRegOrImmOperands(Inst, 1);
5258       } else {
5259         llvm_unreachable("unhandled operand type");
5260       }
5261     }
5262   } else {
5263     // No src modifiers
5264     for (unsigned E = Operands.size(); I != E; ++I) {
5265       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5266       if (Op.isMod()) {
5267         OptionalIdx[Op.getImmTy()] = I;
5268       } else {
5269         Op.addRegOrImmOperands(Inst, 1);
5270       }
5271     }
5272   }
5273 
5274   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5275     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5276   }
5277 
5278   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5279     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5280   }
5281 
5282   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5283   // it has src2 register operand that is tied to dst operand
5284   // we don't allow modifiers for this operand in assembler so src2_modifiers
5285   // should be 0.
5286   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5287       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5288       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5289       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5290     auto it = Inst.begin();
5291     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5292     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5293     ++it;
5294     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5295   }
5296 }
5297 
5298 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5299   OptionalImmIndexMap OptionalIdx;
5300   cvtVOP3(Inst, Operands, OptionalIdx);
5301 }
5302 
5303 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5304                                const OperandVector &Operands) {
5305   OptionalImmIndexMap OptIdx;
5306   const int Opc = Inst.getOpcode();
5307   const MCInstrDesc &Desc = MII.get(Opc);
5308 
5309   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5310 
5311   cvtVOP3(Inst, Operands, OptIdx);
5312 
5313   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5314     assert(!IsPacked);
5315     Inst.addOperand(Inst.getOperand(0));
5316   }
5317 
5318   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5319   // instruction, and then figure out where to actually put the modifiers
5320 
5321   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5322 
5323   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5324   if (OpSelHiIdx != -1) {
5325     int DefaultVal = IsPacked ? -1 : 0;
5326     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5327                           DefaultVal);
5328   }
5329 
5330   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5331   if (NegLoIdx != -1) {
5332     assert(IsPacked);
5333     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5334     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5335   }
5336 
5337   const int Ops[] = { AMDGPU::OpName::src0,
5338                       AMDGPU::OpName::src1,
5339                       AMDGPU::OpName::src2 };
5340   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5341                          AMDGPU::OpName::src1_modifiers,
5342                          AMDGPU::OpName::src2_modifiers };
5343 
5344   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5345 
5346   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5347   unsigned OpSelHi = 0;
5348   unsigned NegLo = 0;
5349   unsigned NegHi = 0;
5350 
5351   if (OpSelHiIdx != -1) {
5352     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5353   }
5354 
5355   if (NegLoIdx != -1) {
5356     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5357     NegLo = Inst.getOperand(NegLoIdx).getImm();
5358     NegHi = Inst.getOperand(NegHiIdx).getImm();
5359   }
5360 
5361   for (int J = 0; J < 3; ++J) {
5362     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5363     if (OpIdx == -1)
5364       break;
5365 
5366     uint32_t ModVal = 0;
5367 
5368     if ((OpSel & (1 << J)) != 0)
5369       ModVal |= SISrcMods::OP_SEL_0;
5370 
5371     if ((OpSelHi & (1 << J)) != 0)
5372       ModVal |= SISrcMods::OP_SEL_1;
5373 
5374     if ((NegLo & (1 << J)) != 0)
5375       ModVal |= SISrcMods::NEG;
5376 
5377     if ((NegHi & (1 << J)) != 0)
5378       ModVal |= SISrcMods::NEG_HI;
5379 
5380     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5381 
5382     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5383   }
5384 }
5385 
5386 //===----------------------------------------------------------------------===//
5387 // dpp
5388 //===----------------------------------------------------------------------===//
5389 
5390 bool AMDGPUOperand::isDPPCtrl() const {
5391   using namespace AMDGPU::DPP;
5392 
5393   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5394   if (result) {
5395     int64_t Imm = getImm();
5396     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5397            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5398            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5399            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5400            (Imm == DppCtrl::WAVE_SHL1) ||
5401            (Imm == DppCtrl::WAVE_ROL1) ||
5402            (Imm == DppCtrl::WAVE_SHR1) ||
5403            (Imm == DppCtrl::WAVE_ROR1) ||
5404            (Imm == DppCtrl::ROW_MIRROR) ||
5405            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5406            (Imm == DppCtrl::BCAST15) ||
5407            (Imm == DppCtrl::BCAST31);
5408   }
5409   return false;
5410 }
5411 
5412 bool AMDGPUOperand::isS16Imm() const {
5413   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5414 }
5415 
5416 bool AMDGPUOperand::isU16Imm() const {
5417   return isImm() && isUInt<16>(getImm());
5418 }
5419 
5420 OperandMatchResultTy
5421 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5422   using namespace AMDGPU::DPP;
5423 
5424   SMLoc S = Parser.getTok().getLoc();
5425   StringRef Prefix;
5426   int64_t Int;
5427 
5428   if (getLexer().getKind() == AsmToken::Identifier) {
5429     Prefix = Parser.getTok().getString();
5430   } else {
5431     return MatchOperand_NoMatch;
5432   }
5433 
5434   if (Prefix == "row_mirror") {
5435     Int = DppCtrl::ROW_MIRROR;
5436     Parser.Lex();
5437   } else if (Prefix == "row_half_mirror") {
5438     Int = DppCtrl::ROW_HALF_MIRROR;
5439     Parser.Lex();
5440   } else {
5441     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5442     if (Prefix != "quad_perm"
5443         && Prefix != "row_shl"
5444         && Prefix != "row_shr"
5445         && Prefix != "row_ror"
5446         && Prefix != "wave_shl"
5447         && Prefix != "wave_rol"
5448         && Prefix != "wave_shr"
5449         && Prefix != "wave_ror"
5450         && Prefix != "row_bcast") {
5451       return MatchOperand_NoMatch;
5452     }
5453 
5454     Parser.Lex();
5455     if (getLexer().isNot(AsmToken::Colon))
5456       return MatchOperand_ParseFail;
5457 
5458     if (Prefix == "quad_perm") {
5459       // quad_perm:[%d,%d,%d,%d]
5460       Parser.Lex();
5461       if (getLexer().isNot(AsmToken::LBrac))
5462         return MatchOperand_ParseFail;
5463       Parser.Lex();
5464 
5465       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5466         return MatchOperand_ParseFail;
5467 
5468       for (int i = 0; i < 3; ++i) {
5469         if (getLexer().isNot(AsmToken::Comma))
5470           return MatchOperand_ParseFail;
5471         Parser.Lex();
5472 
5473         int64_t Temp;
5474         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5475           return MatchOperand_ParseFail;
5476         const int shift = i*2 + 2;
5477         Int += (Temp << shift);
5478       }
5479 
5480       if (getLexer().isNot(AsmToken::RBrac))
5481         return MatchOperand_ParseFail;
5482       Parser.Lex();
5483     } else {
5484       // sel:%d
5485       Parser.Lex();
5486       if (getParser().parseAbsoluteExpression(Int))
5487         return MatchOperand_ParseFail;
5488 
5489       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5490         Int |= DppCtrl::ROW_SHL0;
5491       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5492         Int |= DppCtrl::ROW_SHR0;
5493       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5494         Int |= DppCtrl::ROW_ROR0;
5495       } else if (Prefix == "wave_shl" && 1 == Int) {
5496         Int = DppCtrl::WAVE_SHL1;
5497       } else if (Prefix == "wave_rol" && 1 == Int) {
5498         Int = DppCtrl::WAVE_ROL1;
5499       } else if (Prefix == "wave_shr" && 1 == Int) {
5500         Int = DppCtrl::WAVE_SHR1;
5501       } else if (Prefix == "wave_ror" && 1 == Int) {
5502         Int = DppCtrl::WAVE_ROR1;
5503       } else if (Prefix == "row_bcast") {
5504         if (Int == 15) {
5505           Int = DppCtrl::BCAST15;
5506         } else if (Int == 31) {
5507           Int = DppCtrl::BCAST31;
5508         } else {
5509           return MatchOperand_ParseFail;
5510         }
5511       } else {
5512         return MatchOperand_ParseFail;
5513       }
5514     }
5515   }
5516 
5517   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5518   return MatchOperand_Success;
5519 }
5520 
5521 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5522   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5523 }
5524 
5525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5526   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5527 }
5528 
5529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5530   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5531 }
5532 
5533 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5534   OptionalImmIndexMap OptionalIdx;
5535 
5536   unsigned I = 1;
5537   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5538   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5539     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5540   }
5541 
5542   for (unsigned E = Operands.size(); I != E; ++I) {
5543     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5544                                             MCOI::TIED_TO);
5545     if (TiedTo != -1) {
5546       assert((unsigned)TiedTo < Inst.getNumOperands());
5547       // handle tied old or src2 for MAC instructions
5548       Inst.addOperand(Inst.getOperand(TiedTo));
5549     }
5550     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5551     // Add the register arguments
5552     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5553       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5554       // Skip it.
5555       continue;
5556     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5557       Op.addRegWithFPInputModsOperands(Inst, 2);
5558     } else if (Op.isDPPCtrl()) {
5559       Op.addImmOperands(Inst, 1);
5560     } else if (Op.isImm()) {
5561       // Handle optional arguments
5562       OptionalIdx[Op.getImmTy()] = I;
5563     } else {
5564       llvm_unreachable("Invalid operand type");
5565     }
5566   }
5567 
5568   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5569   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5570   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5571 }
5572 
5573 //===----------------------------------------------------------------------===//
5574 // sdwa
5575 //===----------------------------------------------------------------------===//
5576 
5577 OperandMatchResultTy
5578 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5579                               AMDGPUOperand::ImmTy Type) {
5580   using namespace llvm::AMDGPU::SDWA;
5581 
5582   SMLoc S = Parser.getTok().getLoc();
5583   StringRef Value;
5584   OperandMatchResultTy res;
5585 
5586   res = parseStringWithPrefix(Prefix, Value);
5587   if (res != MatchOperand_Success) {
5588     return res;
5589   }
5590 
5591   int64_t Int;
5592   Int = StringSwitch<int64_t>(Value)
5593         .Case("BYTE_0", SdwaSel::BYTE_0)
5594         .Case("BYTE_1", SdwaSel::BYTE_1)
5595         .Case("BYTE_2", SdwaSel::BYTE_2)
5596         .Case("BYTE_3", SdwaSel::BYTE_3)
5597         .Case("WORD_0", SdwaSel::WORD_0)
5598         .Case("WORD_1", SdwaSel::WORD_1)
5599         .Case("DWORD", SdwaSel::DWORD)
5600         .Default(0xffffffff);
5601   Parser.Lex(); // eat last token
5602 
5603   if (Int == 0xffffffff) {
5604     return MatchOperand_ParseFail;
5605   }
5606 
5607   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5608   return MatchOperand_Success;
5609 }
5610 
5611 OperandMatchResultTy
5612 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5613   using namespace llvm::AMDGPU::SDWA;
5614 
5615   SMLoc S = Parser.getTok().getLoc();
5616   StringRef Value;
5617   OperandMatchResultTy res;
5618 
5619   res = parseStringWithPrefix("dst_unused", Value);
5620   if (res != MatchOperand_Success) {
5621     return res;
5622   }
5623 
5624   int64_t Int;
5625   Int = StringSwitch<int64_t>(Value)
5626         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5627         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5628         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5629         .Default(0xffffffff);
5630   Parser.Lex(); // eat last token
5631 
5632   if (Int == 0xffffffff) {
5633     return MatchOperand_ParseFail;
5634   }
5635 
5636   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5637   return MatchOperand_Success;
5638 }
5639 
5640 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5641   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5642 }
5643 
5644 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5645   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5646 }
5647 
5648 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5649   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5650 }
5651 
5652 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5653   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5654 }
5655 
5656 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5657                               uint64_t BasicInstType, bool skipVcc) {
5658   using namespace llvm::AMDGPU::SDWA;
5659 
5660   OptionalImmIndexMap OptionalIdx;
5661   bool skippedVcc = false;
5662 
5663   unsigned I = 1;
5664   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5665   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5666     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5667   }
5668 
5669   for (unsigned E = Operands.size(); I != E; ++I) {
5670     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5671     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5672       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5673       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5674       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5675       // Skip VCC only if we didn't skip it on previous iteration.
5676       if (BasicInstType == SIInstrFlags::VOP2 &&
5677           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5678         skippedVcc = true;
5679         continue;
5680       } else if (BasicInstType == SIInstrFlags::VOPC &&
5681                  Inst.getNumOperands() == 0) {
5682         skippedVcc = true;
5683         continue;
5684       }
5685     }
5686     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5687       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5688     } else if (Op.isImm()) {
5689       // Handle optional arguments
5690       OptionalIdx[Op.getImmTy()] = I;
5691     } else {
5692       llvm_unreachable("Invalid operand type");
5693     }
5694     skippedVcc = false;
5695   }
5696 
5697   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5698       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5699     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5700     switch (BasicInstType) {
5701     case SIInstrFlags::VOP1:
5702       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5703       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5704         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5705       }
5706       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5707       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5708       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5709       break;
5710 
5711     case SIInstrFlags::VOP2:
5712       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5713       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5714         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5715       }
5716       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5717       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5718       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5719       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5720       break;
5721 
5722     case SIInstrFlags::VOPC:
5723       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5724       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5726       break;
5727 
5728     default:
5729       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5730     }
5731   }
5732 
5733   // special case v_mac_{f16, f32}:
5734   // it has src2 register operand that is tied to dst operand
5735   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5736       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5737     auto it = Inst.begin();
5738     std::advance(
5739       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5740     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5741   }
5742 }
5743 
5744 /// Force static initialization.
5745 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5746   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5747   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5748 }
5749 
5750 #define GET_REGISTER_MATCHER
5751 #define GET_MATCHER_IMPLEMENTATION
5752 #define GET_MNEMONIC_SPELL_CHECKER
5753 #include "AMDGPUGenAsmMatcher.inc"
5754 
5755 // This fuction should be defined after auto-generated include so that we have
5756 // MatchClassKind enum defined
5757 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5758                                                      unsigned Kind) {
5759   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5760   // But MatchInstructionImpl() expects to meet token and fails to validate
5761   // operand. This method checks if we are given immediate operand but expect to
5762   // get corresponding token.
5763   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5764   switch (Kind) {
5765   case MCK_addr64:
5766     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5767   case MCK_gds:
5768     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5769   case MCK_lds:
5770     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5771   case MCK_glc:
5772     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5773   case MCK_idxen:
5774     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5775   case MCK_offen:
5776     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5777   case MCK_SSrcB32:
5778     // When operands have expression values, they will return true for isToken,
5779     // because it is not possible to distinguish between a token and an
5780     // expression at parse time. MatchInstructionImpl() will always try to
5781     // match an operand as a token, when isToken returns true, and when the
5782     // name of the expression is not a valid token, the match will fail,
5783     // so we need to handle it here.
5784     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5785   case MCK_SSrcF32:
5786     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5787   case MCK_SoppBrTarget:
5788     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5789   case MCK_VReg32OrOff:
5790     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5791   case MCK_InterpSlot:
5792     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5793   case MCK_Attr:
5794     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5795   case MCK_AttrChan:
5796     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5797   default:
5798     return Match_InvalidOperand;
5799   }
5800 }
5801