1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/AMDGPUMetadata.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/MachineValueType.h"
36 #include "llvm/Support/TargetParser.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65       : Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrInline(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type);
251   }
252 
253   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
254     return isRegOrInline(RCID, type) || isLiteralImm(type);
255   }
256 
257   bool isRegOrImmWithInt16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
259   }
260 
261   bool isRegOrImmWithInt32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263   }
264 
265   bool isRegOrImmWithInt64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
267   }
268 
269   bool isRegOrImmWithFP16InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
271   }
272 
273   bool isRegOrImmWithFP32InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
275   }
276 
277   bool isRegOrImmWithFP64InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
279   }
280 
281   bool isVReg() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
283            isRegClass(AMDGPU::VReg_64RegClassID) ||
284            isRegClass(AMDGPU::VReg_96RegClassID) ||
285            isRegClass(AMDGPU::VReg_128RegClassID) ||
286            isRegClass(AMDGPU::VReg_160RegClassID) ||
287            isRegClass(AMDGPU::VReg_192RegClassID) ||
288            isRegClass(AMDGPU::VReg_256RegClassID) ||
289            isRegClass(AMDGPU::VReg_512RegClassID) ||
290            isRegClass(AMDGPU::VReg_1024RegClassID);
291   }
292 
293   bool isVReg32() const {
294     return isRegClass(AMDGPU::VGPR_32RegClassID);
295   }
296 
297   bool isVReg32OrOff() const {
298     return isOff() || isVReg32();
299   }
300 
301   bool isNull() const {
302     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
303   }
304 
305   bool isVRegWithInputMods() const;
306 
307   bool isSDWAOperand(MVT type) const;
308   bool isSDWAFP16Operand() const;
309   bool isSDWAFP32Operand() const;
310   bool isSDWAInt16Operand() const;
311   bool isSDWAInt32Operand() const;
312 
313   bool isImmTy(ImmTy ImmT) const {
314     return isImm() && Imm.Type == ImmT;
315   }
316 
317   bool isImmModifier() const {
318     return isImm() && Imm.Type != ImmTyNone;
319   }
320 
321   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
322   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
323   bool isDMask() const { return isImmTy(ImmTyDMask); }
324   bool isDim() const { return isImmTy(ImmTyDim); }
325   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
326   bool isDA() const { return isImmTy(ImmTyDA); }
327   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
328   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
329   bool isLWE() const { return isImmTy(ImmTyLWE); }
330   bool isOff() const { return isImmTy(ImmTyOff); }
331   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
332   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
333   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
334   bool isOffen() const { return isImmTy(ImmTyOffen); }
335   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
336   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
337   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
338   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
339   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
340 
341   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
342   bool isGDS() const { return isImmTy(ImmTyGDS); }
343   bool isLDS() const { return isImmTy(ImmTyLDS); }
344   bool isCPol() const { return isImmTy(ImmTyCPol); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return isRegOrInline(RCID, type) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcV2FP32() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF32();
455   }
456 
457   bool isSCSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSCSrcF32();
460   }
461 
462   bool isSSrcV2INT32() const {
463     llvm_unreachable("cannot happen");
464     return isSSrcB32();
465   }
466 
467   bool isSCSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSCSrcB32();
470   }
471 
472   bool isSSrcOrLdsB32() const {
473     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
474            isLiteralImm(MVT::i32) || isExpr();
475   }
476 
477   bool isVCSrcB32() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
479   }
480 
481   bool isVCSrcB64() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
483   }
484 
485   bool isVCSrcB16() const {
486     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
487   }
488 
489   bool isVCSrcV2B16() const {
490     return isVCSrcB16();
491   }
492 
493   bool isVCSrcF32() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
495   }
496 
497   bool isVCSrcF64() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
499   }
500 
501   bool isVCSrcF16() const {
502     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
503   }
504 
505   bool isVCSrcV2F16() const {
506     return isVCSrcF16();
507   }
508 
509   bool isVSrcB32() const {
510     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
511   }
512 
513   bool isVSrcB64() const {
514     return isVCSrcF64() || isLiteralImm(MVT::i64);
515   }
516 
517   bool isVSrcB16() const {
518     return isVCSrcB16() || isLiteralImm(MVT::i16);
519   }
520 
521   bool isVSrcV2B16() const {
522     return isVSrcB16() || isLiteralImm(MVT::v2i16);
523   }
524 
525   bool isVCSrcV2FP32() const {
526     return isVCSrcF64();
527   }
528 
529   bool isVSrcV2FP32() const {
530     return isVSrcF64() || isLiteralImm(MVT::v2f32);
531   }
532 
533   bool isVCSrcV2INT32() const {
534     return isVCSrcB64();
535   }
536 
537   bool isVSrcV2INT32() const {
538     return isVSrcB64() || isLiteralImm(MVT::v2i32);
539   }
540 
541   bool isVSrcF32() const {
542     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
543   }
544 
545   bool isVSrcF64() const {
546     return isVCSrcF64() || isLiteralImm(MVT::f64);
547   }
548 
549   bool isVSrcF16() const {
550     return isVCSrcF16() || isLiteralImm(MVT::f16);
551   }
552 
553   bool isVSrcV2F16() const {
554     return isVSrcF16() || isLiteralImm(MVT::v2f16);
555   }
556 
557   bool isVISrcB32() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
559   }
560 
561   bool isVISrcB16() const {
562     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
563   }
564 
565   bool isVISrcV2B16() const {
566     return isVISrcB16();
567   }
568 
569   bool isVISrcF32() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
571   }
572 
573   bool isVISrcF16() const {
574     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
575   }
576 
577   bool isVISrcV2F16() const {
578     return isVISrcF16() || isVISrcB32();
579   }
580 
581   bool isVISrc_64B64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
583   }
584 
585   bool isVISrc_64F64() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
587   }
588 
589   bool isVISrc_64V2FP32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
591   }
592 
593   bool isVISrc_64V2INT32() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
595   }
596 
597   bool isVISrc_256B64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
599   }
600 
601   bool isVISrc_256F64() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
603   }
604 
605   bool isVISrc_128B16() const {
606     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
607   }
608 
609   bool isVISrc_128V2B16() const {
610     return isVISrc_128B16();
611   }
612 
613   bool isVISrc_128B32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
615   }
616 
617   bool isVISrc_128F32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2FP32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
623   }
624 
625   bool isVISrc_256V2INT32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B32() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
631   }
632 
633   bool isVISrc_512B16() const {
634     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
635   }
636 
637   bool isVISrc_512V2B16() const {
638     return isVISrc_512B16();
639   }
640 
641   bool isVISrc_512F32() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
643   }
644 
645   bool isVISrc_512F16() const {
646     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
647   }
648 
649   bool isVISrc_512V2F16() const {
650     return isVISrc_512F16() || isVISrc_512B32();
651   }
652 
653   bool isVISrc_1024B32() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
655   }
656 
657   bool isVISrc_1024B16() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
659   }
660 
661   bool isVISrc_1024V2B16() const {
662     return isVISrc_1024B16();
663   }
664 
665   bool isVISrc_1024F32() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
667   }
668 
669   bool isVISrc_1024F16() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
671   }
672 
673   bool isVISrc_1024V2F16() const {
674     return isVISrc_1024F16() || isVISrc_1024B32();
675   }
676 
677   bool isAISrcB32() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
679   }
680 
681   bool isAISrcB16() const {
682     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
683   }
684 
685   bool isAISrcV2B16() const {
686     return isAISrcB16();
687   }
688 
689   bool isAISrcF32() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
691   }
692 
693   bool isAISrcF16() const {
694     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
695   }
696 
697   bool isAISrcV2F16() const {
698     return isAISrcF16() || isAISrcB32();
699   }
700 
701   bool isAISrc_64B64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
703   }
704 
705   bool isAISrc_64F64() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
707   }
708 
709   bool isAISrc_128B32() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
711   }
712 
713   bool isAISrc_128B16() const {
714     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
715   }
716 
717   bool isAISrc_128V2B16() const {
718     return isAISrc_128B16();
719   }
720 
721   bool isAISrc_128F32() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
723   }
724 
725   bool isAISrc_128F16() const {
726     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
727   }
728 
729   bool isAISrc_128V2F16() const {
730     return isAISrc_128F16() || isAISrc_128B32();
731   }
732 
733   bool isVISrc_128F16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
735   }
736 
737   bool isVISrc_128V2F16() const {
738     return isVISrc_128F16() || isVISrc_128B32();
739   }
740 
741   bool isAISrc_256B64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
743   }
744 
745   bool isAISrc_256F64() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
747   }
748 
749   bool isAISrc_512B32() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
751   }
752 
753   bool isAISrc_512B16() const {
754     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
755   }
756 
757   bool isAISrc_512V2B16() const {
758     return isAISrc_512B16();
759   }
760 
761   bool isAISrc_512F32() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
763   }
764 
765   bool isAISrc_512F16() const {
766     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
767   }
768 
769   bool isAISrc_512V2F16() const {
770     return isAISrc_512F16() || isAISrc_512B32();
771   }
772 
773   bool isAISrc_1024B32() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
775   }
776 
777   bool isAISrc_1024B16() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
779   }
780 
781   bool isAISrc_1024V2B16() const {
782     return isAISrc_1024B16();
783   }
784 
785   bool isAISrc_1024F32() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
787   }
788 
789   bool isAISrc_1024F16() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
791   }
792 
793   bool isAISrc_1024V2F16() const {
794     return isAISrc_1024F16() || isAISrc_1024B32();
795   }
796 
797   bool isKImmFP32() const {
798     return isLiteralImm(MVT::f32);
799   }
800 
801   bool isKImmFP16() const {
802     return isLiteralImm(MVT::f16);
803   }
804 
805   bool isMem() const override {
806     return false;
807   }
808 
809   bool isExpr() const {
810     return Kind == Expression;
811   }
812 
813   bool isSoppBrTarget() const {
814     return isExpr() || isImm();
815   }
816 
817   bool isSWaitCnt() const;
818   bool isHwreg() const;
819   bool isSendMsg() const;
820   bool isSwizzle() const;
821   bool isSMRDOffset8() const;
822   bool isSMEMOffset() const;
823   bool isSMRDLiteralOffset() const;
824   bool isDPP8() const;
825   bool isDPPCtrl() const;
826   bool isBLGP() const;
827   bool isCBSZ() const;
828   bool isABID() const;
829   bool isGPRIdxMode() const;
830   bool isS16Imm() const;
831   bool isU16Imm() const;
832   bool isEndpgm() const;
833 
834   StringRef getExpressionAsToken() const {
835     assert(isExpr());
836     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
837     return S->getSymbol().getName();
838   }
839 
840   StringRef getToken() const {
841     assert(isToken());
842 
843     if (Kind == Expression)
844       return getExpressionAsToken();
845 
846     return StringRef(Tok.Data, Tok.Length);
847   }
848 
849   int64_t getImm() const {
850     assert(isImm());
851     return Imm.Val;
852   }
853 
854   void setImm(int64_t Val) {
855     assert(isImm());
856     Imm.Val = Val;
857   }
858 
859   ImmTy getImmTy() const {
860     assert(isImm());
861     return Imm.Type;
862   }
863 
864   unsigned getReg() const override {
865     assert(isRegKind());
866     return Reg.RegNo;
867   }
868 
869   SMLoc getStartLoc() const override {
870     return StartLoc;
871   }
872 
873   SMLoc getEndLoc() const override {
874     return EndLoc;
875   }
876 
877   SMRange getLocRange() const {
878     return SMRange(StartLoc, EndLoc);
879   }
880 
881   Modifiers getModifiers() const {
882     assert(isRegKind() || isImmTy(ImmTyNone));
883     return isRegKind() ? Reg.Mods : Imm.Mods;
884   }
885 
886   void setModifiers(Modifiers Mods) {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     if (isRegKind())
889       Reg.Mods = Mods;
890     else
891       Imm.Mods = Mods;
892   }
893 
894   bool hasModifiers() const {
895     return getModifiers().hasModifiers();
896   }
897 
898   bool hasFPModifiers() const {
899     return getModifiers().hasFPModifiers();
900   }
901 
902   bool hasIntModifiers() const {
903     return getModifiers().hasIntModifiers();
904   }
905 
906   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
907 
908   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
909 
910   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
911 
912   template <unsigned Bitwidth>
913   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
914 
915   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<16>(Inst, N);
917   }
918 
919   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<32>(Inst, N);
921   }
922 
923   void addRegOperands(MCInst &Inst, unsigned N) const;
924 
925   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
926     addRegOperands(Inst, N);
927   }
928 
929   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
930     if (isRegKind())
931       addRegOperands(Inst, N);
932     else if (isExpr())
933       Inst.addOperand(MCOperand::createExpr(Expr));
934     else
935       addImmOperands(Inst, N);
936   }
937 
938   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
939     Modifiers Mods = getModifiers();
940     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
941     if (isRegKind()) {
942       addRegOperands(Inst, N);
943     } else {
944       addImmOperands(Inst, N, false);
945     }
946   }
947 
948   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
949     assert(!hasIntModifiers());
950     addRegOrImmWithInputModsOperands(Inst, N);
951   }
952 
953   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasFPModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
959     Modifiers Mods = getModifiers();
960     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
961     assert(isRegKind());
962     addRegOperands(Inst, N);
963   }
964 
965   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
966     assert(!hasIntModifiers());
967     addRegWithInputModsOperands(Inst, N);
968   }
969 
970   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasFPModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
976     if (isImm())
977       addImmOperands(Inst, N);
978     else {
979       assert(isExpr());
980       Inst.addOperand(MCOperand::createExpr(Expr));
981     }
982   }
983 
984   static void printImmTy(raw_ostream& OS, ImmTy Type) {
985     switch (Type) {
986     case ImmTyNone: OS << "None"; break;
987     case ImmTyGDS: OS << "GDS"; break;
988     case ImmTyLDS: OS << "LDS"; break;
989     case ImmTyOffen: OS << "Offen"; break;
990     case ImmTyIdxen: OS << "Idxen"; break;
991     case ImmTyAddr64: OS << "Addr64"; break;
992     case ImmTyOffset: OS << "Offset"; break;
993     case ImmTyInstOffset: OS << "InstOffset"; break;
994     case ImmTyOffset0: OS << "Offset0"; break;
995     case ImmTyOffset1: OS << "Offset1"; break;
996     case ImmTyCPol: OS << "CPol"; break;
997     case ImmTySWZ: OS << "SWZ"; break;
998     case ImmTyTFE: OS << "TFE"; break;
999     case ImmTyD16: OS << "D16"; break;
1000     case ImmTyFORMAT: OS << "FORMAT"; break;
1001     case ImmTyClampSI: OS << "ClampSI"; break;
1002     case ImmTyOModSI: OS << "OModSI"; break;
1003     case ImmTyDPP8: OS << "DPP8"; break;
1004     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1005     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1006     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1007     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1008     case ImmTyDppFi: OS << "FI"; break;
1009     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1010     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1011     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1012     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1013     case ImmTyDMask: OS << "DMask"; break;
1014     case ImmTyDim: OS << "Dim"; break;
1015     case ImmTyUNorm: OS << "UNorm"; break;
1016     case ImmTyDA: OS << "DA"; break;
1017     case ImmTyR128A16: OS << "R128A16"; break;
1018     case ImmTyA16: OS << "A16"; break;
1019     case ImmTyLWE: OS << "LWE"; break;
1020     case ImmTyOff: OS << "Off"; break;
1021     case ImmTyExpTgt: OS << "ExpTgt"; break;
1022     case ImmTyExpCompr: OS << "ExpCompr"; break;
1023     case ImmTyExpVM: OS << "ExpVM"; break;
1024     case ImmTyHwreg: OS << "Hwreg"; break;
1025     case ImmTySendMsg: OS << "SendMsg"; break;
1026     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1027     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1028     case ImmTyAttrChan: OS << "AttrChan"; break;
1029     case ImmTyOpSel: OS << "OpSel"; break;
1030     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1031     case ImmTyNegLo: OS << "NegLo"; break;
1032     case ImmTyNegHi: OS << "NegHi"; break;
1033     case ImmTySwizzle: OS << "Swizzle"; break;
1034     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1035     case ImmTyHigh: OS << "High"; break;
1036     case ImmTyBLGP: OS << "BLGP"; break;
1037     case ImmTyCBSZ: OS << "CBSZ"; break;
1038     case ImmTyABID: OS << "ABID"; break;
1039     case ImmTyEndpgm: OS << "Endpgm"; break;
1040     }
1041   }
1042 
1043   void print(raw_ostream &OS) const override {
1044     switch (Kind) {
1045     case Register:
1046       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1047       break;
1048     case Immediate:
1049       OS << '<' << getImm();
1050       if (getImmTy() != ImmTyNone) {
1051         OS << " type: "; printImmTy(OS, getImmTy());
1052       }
1053       OS << " mods: " << Imm.Mods << '>';
1054       break;
1055     case Token:
1056       OS << '\'' << getToken() << '\'';
1057       break;
1058     case Expression:
1059       OS << "<expr " << *Expr << '>';
1060       break;
1061     }
1062   }
1063 
1064   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1065                                       int64_t Val, SMLoc Loc,
1066                                       ImmTy Type = ImmTyNone,
1067                                       bool IsFPImm = false) {
1068     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1069     Op->Imm.Val = Val;
1070     Op->Imm.IsFPImm = IsFPImm;
1071     Op->Imm.Kind = ImmKindTyNone;
1072     Op->Imm.Type = Type;
1073     Op->Imm.Mods = Modifiers();
1074     Op->StartLoc = Loc;
1075     Op->EndLoc = Loc;
1076     return Op;
1077   }
1078 
1079   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1080                                         StringRef Str, SMLoc Loc,
1081                                         bool HasExplicitEncodingSize = true) {
1082     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1083     Res->Tok.Data = Str.data();
1084     Res->Tok.Length = Str.size();
1085     Res->StartLoc = Loc;
1086     Res->EndLoc = Loc;
1087     return Res;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1091                                       unsigned RegNo, SMLoc S,
1092                                       SMLoc E) {
1093     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1094     Op->Reg.RegNo = RegNo;
1095     Op->Reg.Mods = Modifiers();
1096     Op->StartLoc = S;
1097     Op->EndLoc = E;
1098     return Op;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1102                                        const class MCExpr *Expr, SMLoc S) {
1103     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1104     Op->Expr = Expr;
1105     Op->StartLoc = S;
1106     Op->EndLoc = S;
1107     return Op;
1108   }
1109 };
1110 
1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1112   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1113   return OS;
1114 }
1115 
1116 //===----------------------------------------------------------------------===//
1117 // AsmParser
1118 //===----------------------------------------------------------------------===//
1119 
1120 // Holds info related to the current kernel, e.g. count of SGPRs used.
1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1122 // .amdgpu_hsa_kernel or at EOF.
1123 class KernelScopeInfo {
1124   int SgprIndexUnusedMin = -1;
1125   int VgprIndexUnusedMin = -1;
1126   MCContext *Ctx = nullptr;
1127 
1128   void usesSgprAt(int i) {
1129     if (i >= SgprIndexUnusedMin) {
1130       SgprIndexUnusedMin = ++i;
1131       if (Ctx) {
1132         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1133         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1134       }
1135     }
1136   }
1137 
1138   void usesVgprAt(int i) {
1139     if (i >= VgprIndexUnusedMin) {
1140       VgprIndexUnusedMin = ++i;
1141       if (Ctx) {
1142         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1143         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1144       }
1145     }
1146   }
1147 
1148 public:
1149   KernelScopeInfo() = default;
1150 
1151   void initialize(MCContext &Context) {
1152     Ctx = &Context;
1153     usesSgprAt(SgprIndexUnusedMin = -1);
1154     usesVgprAt(VgprIndexUnusedMin = -1);
1155   }
1156 
1157   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1158     switch (RegKind) {
1159       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1160       case IS_AGPR: // fall through
1161       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1162       default: break;
1163     }
1164   }
1165 };
1166 
1167 class AMDGPUAsmParser : public MCTargetAsmParser {
1168   MCAsmParser &Parser;
1169 
1170   // Number of extra operands parsed after the first optional operand.
1171   // This may be necessary to skip hardcoded mandatory operands.
1172   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1173 
1174   unsigned ForcedEncodingSize = 0;
1175   bool ForcedDPP = false;
1176   bool ForcedSDWA = false;
1177   KernelScopeInfo KernelScope;
1178   unsigned CPolSeen;
1179 
1180   /// @name Auto-generated Match Functions
1181   /// {
1182 
1183 #define GET_ASSEMBLER_HEADER
1184 #include "AMDGPUGenAsmMatcher.inc"
1185 
1186   /// }
1187 
1188 private:
1189   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1190   bool OutOfRangeError(SMRange Range);
1191   /// Calculate VGPR/SGPR blocks required for given target, reserved
1192   /// registers, and user-specified NextFreeXGPR values.
1193   ///
1194   /// \param Features [in] Target features, used for bug corrections.
1195   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1196   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1197   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1198   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1199   /// descriptor field, if valid.
1200   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1201   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1202   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1203   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1204   /// \param VGPRBlocks [out] Result VGPR block count.
1205   /// \param SGPRBlocks [out] Result SGPR block count.
1206   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1207                           bool FlatScrUsed, bool XNACKUsed,
1208                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1209                           SMRange VGPRRange, unsigned NextFreeSGPR,
1210                           SMRange SGPRRange, unsigned &VGPRBlocks,
1211                           unsigned &SGPRBlocks);
1212   bool ParseDirectiveAMDGCNTarget();
1213   bool ParseDirectiveAMDHSAKernel();
1214   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1215   bool ParseDirectiveHSACodeObjectVersion();
1216   bool ParseDirectiveHSACodeObjectISA();
1217   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1218   bool ParseDirectiveAMDKernelCodeT();
1219   // TODO: Possibly make subtargetHasRegister const.
1220   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1221   bool ParseDirectiveAMDGPUHsaKernel();
1222 
1223   bool ParseDirectiveISAVersion();
1224   bool ParseDirectiveHSAMetadata();
1225   bool ParseDirectivePALMetadataBegin();
1226   bool ParseDirectivePALMetadata();
1227   bool ParseDirectiveAMDGPULDS();
1228 
1229   /// Common code to parse out a block of text (typically YAML) between start and
1230   /// end directives.
1231   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1232                            const char *AssemblerDirectiveEnd,
1233                            std::string &CollectString);
1234 
1235   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1236                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1237   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1238                            unsigned &RegNum, unsigned &RegWidth,
1239                            bool RestoreOnFailure = false);
1240   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1241                            unsigned &RegNum, unsigned &RegWidth,
1242                            SmallVectorImpl<AsmToken> &Tokens);
1243   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1244                            unsigned &RegWidth,
1245                            SmallVectorImpl<AsmToken> &Tokens);
1246   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1247                            unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1250                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1251   bool ParseRegRange(unsigned& Num, unsigned& Width);
1252   unsigned getRegularReg(RegisterKind RegKind,
1253                          unsigned RegNum,
1254                          unsigned RegWidth,
1255                          SMLoc Loc);
1256 
1257   bool isRegister();
1258   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1259   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1260   void initializeGprCountSymbol(RegisterKind RegKind);
1261   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1262                              unsigned RegWidth);
1263   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1264                     bool IsAtomic, bool IsLds = false);
1265   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1266                  bool IsGdsHardcoded);
1267 
1268 public:
1269   enum AMDGPUMatchResultTy {
1270     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1271   };
1272   enum OperandMode {
1273     OperandMode_Default,
1274     OperandMode_NSA,
1275   };
1276 
1277   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1278 
1279   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1280                const MCInstrInfo &MII,
1281                const MCTargetOptions &Options)
1282       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1283     MCAsmParserExtension::Initialize(Parser);
1284 
1285     if (getFeatureBits().none()) {
1286       // Set default features.
1287       copySTI().ToggleFeature("southern-islands");
1288     }
1289 
1290     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1291 
1292     {
1293       // TODO: make those pre-defined variables read-only.
1294       // Currently there is none suitable machinery in the core llvm-mc for this.
1295       // MCSymbol::isRedefinable is intended for another purpose, and
1296       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1297       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1298       MCContext &Ctx = getContext();
1299       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1300         MCSymbol *Sym =
1301             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1303         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1305         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1307       } else {
1308         MCSymbol *Sym =
1309             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1313         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1315       }
1316       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1317         initializeGprCountSymbol(IS_VGPR);
1318         initializeGprCountSymbol(IS_SGPR);
1319       } else
1320         KernelScope.initialize(getContext());
1321     }
1322   }
1323 
1324   bool hasMIMG_R128() const {
1325     return AMDGPU::hasMIMG_R128(getSTI());
1326   }
1327 
1328   bool hasPackedD16() const {
1329     return AMDGPU::hasPackedD16(getSTI());
1330   }
1331 
1332   bool hasGFX10A16() const {
1333     return AMDGPU::hasGFX10A16(getSTI());
1334   }
1335 
1336   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1337 
1338   bool isSI() const {
1339     return AMDGPU::isSI(getSTI());
1340   }
1341 
1342   bool isCI() const {
1343     return AMDGPU::isCI(getSTI());
1344   }
1345 
1346   bool isVI() const {
1347     return AMDGPU::isVI(getSTI());
1348   }
1349 
1350   bool isGFX9() const {
1351     return AMDGPU::isGFX9(getSTI());
1352   }
1353 
1354   bool isGFX90A() const {
1355     return AMDGPU::isGFX90A(getSTI());
1356   }
1357 
1358   bool isGFX9Plus() const {
1359     return AMDGPU::isGFX9Plus(getSTI());
1360   }
1361 
1362   bool isGFX10() const {
1363     return AMDGPU::isGFX10(getSTI());
1364   }
1365 
1366   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1367 
1368   bool isGFX10_BEncoding() const {
1369     return AMDGPU::isGFX10_BEncoding(getSTI());
1370   }
1371 
1372   bool hasInv2PiInlineImm() const {
1373     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1374   }
1375 
1376   bool hasFlatOffsets() const {
1377     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1378   }
1379 
1380   bool hasArchitectedFlatScratch() const {
1381     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1382   }
1383 
1384   bool hasSGPR102_SGPR103() const {
1385     return !isVI() && !isGFX9();
1386   }
1387 
1388   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1389 
1390   bool hasIntClamp() const {
1391     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1392   }
1393 
1394   AMDGPUTargetStreamer &getTargetStreamer() {
1395     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1396     return static_cast<AMDGPUTargetStreamer &>(TS);
1397   }
1398 
1399   const MCRegisterInfo *getMRI() const {
1400     // We need this const_cast because for some reason getContext() is not const
1401     // in MCAsmParser.
1402     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1403   }
1404 
1405   const MCInstrInfo *getMII() const {
1406     return &MII;
1407   }
1408 
1409   const FeatureBitset &getFeatureBits() const {
1410     return getSTI().getFeatureBits();
1411   }
1412 
1413   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1414   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1415   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1416 
1417   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1418   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1419   bool isForcedDPP() const { return ForcedDPP; }
1420   bool isForcedSDWA() const { return ForcedSDWA; }
1421   ArrayRef<unsigned> getMatchedVariants() const;
1422   StringRef getMatchedVariantName() const;
1423 
1424   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1425   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1426                      bool RestoreOnFailure);
1427   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1428   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1429                                         SMLoc &EndLoc) override;
1430   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1431   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1432                                       unsigned Kind) override;
1433   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1434                                OperandVector &Operands, MCStreamer &Out,
1435                                uint64_t &ErrorInfo,
1436                                bool MatchingInlineAsm) override;
1437   bool ParseDirective(AsmToken DirectiveID) override;
1438   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1439                                     OperandMode Mode = OperandMode_Default);
1440   StringRef parseMnemonicSuffix(StringRef Name);
1441   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1442                         SMLoc NameLoc, OperandVector &Operands) override;
1443   //bool ProcessInstruction(MCInst &Inst);
1444 
1445   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1446 
1447   OperandMatchResultTy
1448   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1449                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1450                      bool (*ConvertResult)(int64_t &) = nullptr);
1451 
1452   OperandMatchResultTy
1453   parseOperandArrayWithPrefix(const char *Prefix,
1454                               OperandVector &Operands,
1455                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1456                               bool (*ConvertResult)(int64_t&) = nullptr);
1457 
1458   OperandMatchResultTy
1459   parseNamedBit(StringRef Name, OperandVector &Operands,
1460                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1461   OperandMatchResultTy parseCPol(OperandVector &Operands);
1462   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1463                                              StringRef &Value,
1464                                              SMLoc &StringLoc);
1465 
1466   bool isModifier();
1467   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1468   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1469   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1470   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool parseSP3NegModifier();
1472   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1473   OperandMatchResultTy parseReg(OperandVector &Operands);
1474   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1475   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1476   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1477   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1478   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1479   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1480   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1481   OperandMatchResultTy parseUfmt(int64_t &Format);
1482   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1483   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1484   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1485   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1486   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1487   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1488   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1489 
1490   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1491   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1492   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1493   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1494 
1495   bool parseCnt(int64_t &IntVal);
1496   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1497   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1498 
1499 private:
1500   struct OperandInfoTy {
1501     SMLoc Loc;
1502     int64_t Id;
1503     bool IsSymbolic = false;
1504     bool IsDefined = false;
1505 
1506     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1507   };
1508 
1509   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1510   bool validateSendMsg(const OperandInfoTy &Msg,
1511                        const OperandInfoTy &Op,
1512                        const OperandInfoTy &Stream);
1513 
1514   bool parseHwregBody(OperandInfoTy &HwReg,
1515                       OperandInfoTy &Offset,
1516                       OperandInfoTy &Width);
1517   bool validateHwreg(const OperandInfoTy &HwReg,
1518                      const OperandInfoTy &Offset,
1519                      const OperandInfoTy &Width);
1520 
1521   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1522   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1523 
1524   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1525                       const OperandVector &Operands) const;
1526   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1527   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1528   SMLoc getLitLoc(const OperandVector &Operands) const;
1529   SMLoc getConstLoc(const OperandVector &Operands) const;
1530 
1531   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1532   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1534   bool validateSOPLiteral(const MCInst &Inst) const;
1535   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateIntClampSupported(const MCInst &Inst);
1538   bool validateMIMGAtomicDMask(const MCInst &Inst);
1539   bool validateMIMGGatherDMask(const MCInst &Inst);
1540   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateMIMGDataSize(const MCInst &Inst);
1542   bool validateMIMGAddrSize(const MCInst &Inst);
1543   bool validateMIMGD16(const MCInst &Inst);
1544   bool validateMIMGDim(const MCInst &Inst);
1545   bool validateMIMGMSAA(const MCInst &Inst);
1546   bool validateOpSel(const MCInst &Inst);
1547   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1548   bool validateVccOperand(unsigned Reg) const;
1549   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1551   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1552   bool validateAGPRLdSt(const MCInst &Inst) const;
1553   bool validateVGPRAlign(const MCInst &Inst) const;
1554   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1555   bool validateDivScale(const MCInst &Inst);
1556   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1557                              const SMLoc &IDLoc);
1558   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1559   unsigned getConstantBusLimit(unsigned Opcode) const;
1560   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1561   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1562   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1563 
1564   bool isSupportedMnemo(StringRef Mnemo,
1565                         const FeatureBitset &FBS);
1566   bool isSupportedMnemo(StringRef Mnemo,
1567                         const FeatureBitset &FBS,
1568                         ArrayRef<unsigned> Variants);
1569   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1570 
1571   bool isId(const StringRef Id) const;
1572   bool isId(const AsmToken &Token, const StringRef Id) const;
1573   bool isToken(const AsmToken::TokenKind Kind) const;
1574   bool trySkipId(const StringRef Id);
1575   bool trySkipId(const StringRef Pref, const StringRef Id);
1576   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1577   bool trySkipToken(const AsmToken::TokenKind Kind);
1578   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1579   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1580   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1581 
1582   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1583   AsmToken::TokenKind getTokenKind() const;
1584   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1585   bool parseExpr(OperandVector &Operands);
1586   StringRef getTokenStr() const;
1587   AsmToken peekToken();
1588   AsmToken getToken() const;
1589   SMLoc getLoc() const;
1590   void lex();
1591 
1592 public:
1593   void onBeginOfFile() override;
1594 
1595   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1596   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1597 
1598   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1599   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1600   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1601   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1602   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1603   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1604 
1605   bool parseSwizzleOperand(int64_t &Op,
1606                            const unsigned MinVal,
1607                            const unsigned MaxVal,
1608                            const StringRef ErrMsg,
1609                            SMLoc &Loc);
1610   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1611                             const unsigned MinVal,
1612                             const unsigned MaxVal,
1613                             const StringRef ErrMsg);
1614   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1615   bool parseSwizzleOffset(int64_t &Imm);
1616   bool parseSwizzleMacro(int64_t &Imm);
1617   bool parseSwizzleQuadPerm(int64_t &Imm);
1618   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1619   bool parseSwizzleBroadcast(int64_t &Imm);
1620   bool parseSwizzleSwap(int64_t &Imm);
1621   bool parseSwizzleReverse(int64_t &Imm);
1622 
1623   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1624   int64_t parseGPRIdxMacro();
1625 
1626   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1627   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1628   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1629   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1630 
1631   AMDGPUOperand::Ptr defaultCPol() const;
1632 
1633   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1634   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1635   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1636   AMDGPUOperand::Ptr defaultFlatOffset() const;
1637 
1638   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1639 
1640   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1641                OptionalImmIndexMap &OptionalIdx);
1642   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1643   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1644   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1645   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1646                 OptionalImmIndexMap &OptionalIdx);
1647 
1648   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1651                bool IsAtomic = false);
1652   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1653   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1654 
1655   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1656 
1657   bool parseDimId(unsigned &Encoding);
1658   OperandMatchResultTy parseDim(OperandVector &Operands);
1659   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1660   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1661   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1662   int64_t parseDPPCtrlSel(StringRef Ctrl);
1663   int64_t parseDPPCtrlPerm();
1664   AMDGPUOperand::Ptr defaultRowMask() const;
1665   AMDGPUOperand::Ptr defaultBankMask() const;
1666   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1667   AMDGPUOperand::Ptr defaultFI() const;
1668   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1669   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1670 
1671   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1672                                     AMDGPUOperand::ImmTy Type);
1673   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1674   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1675   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1676   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1679   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1680                uint64_t BasicInstType,
1681                bool SkipDstVcc = false,
1682                bool SkipSrcVcc = false);
1683 
1684   AMDGPUOperand::Ptr defaultBLGP() const;
1685   AMDGPUOperand::Ptr defaultCBSZ() const;
1686   AMDGPUOperand::Ptr defaultABID() const;
1687 
1688   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1689   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1690 };
1691 
1692 struct OptionalOperand {
1693   const char *Name;
1694   AMDGPUOperand::ImmTy Type;
1695   bool IsBit;
1696   bool (*ConvertResult)(int64_t&);
1697 };
1698 
1699 } // end anonymous namespace
1700 
1701 // May be called with integer type with equivalent bitwidth.
1702 static const fltSemantics *getFltSemantics(unsigned Size) {
1703   switch (Size) {
1704   case 4:
1705     return &APFloat::IEEEsingle();
1706   case 8:
1707     return &APFloat::IEEEdouble();
1708   case 2:
1709     return &APFloat::IEEEhalf();
1710   default:
1711     llvm_unreachable("unsupported fp type");
1712   }
1713 }
1714 
1715 static const fltSemantics *getFltSemantics(MVT VT) {
1716   return getFltSemantics(VT.getSizeInBits() / 8);
1717 }
1718 
1719 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1720   switch (OperandType) {
1721   case AMDGPU::OPERAND_REG_IMM_INT32:
1722   case AMDGPU::OPERAND_REG_IMM_FP32:
1723   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1724   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1725   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1727   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1728   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1729   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1730   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1731   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1732   case AMDGPU::OPERAND_KIMM32:
1733     return &APFloat::IEEEsingle();
1734   case AMDGPU::OPERAND_REG_IMM_INT64:
1735   case AMDGPU::OPERAND_REG_IMM_FP64:
1736   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1737   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1738   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1739     return &APFloat::IEEEdouble();
1740   case AMDGPU::OPERAND_REG_IMM_INT16:
1741   case AMDGPU::OPERAND_REG_IMM_FP16:
1742   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1743   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1744   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1745   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1746   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1747   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1748   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1749   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1750   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1751   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1752   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1753   case AMDGPU::OPERAND_KIMM16:
1754     return &APFloat::IEEEhalf();
1755   default:
1756     llvm_unreachable("unsupported fp type");
1757   }
1758 }
1759 
1760 //===----------------------------------------------------------------------===//
1761 // Operand
1762 //===----------------------------------------------------------------------===//
1763 
1764 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1765   bool Lost;
1766 
1767   // Convert literal to single precision
1768   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1769                                                APFloat::rmNearestTiesToEven,
1770                                                &Lost);
1771   // We allow precision lost but not overflow or underflow
1772   if (Status != APFloat::opOK &&
1773       Lost &&
1774       ((Status & APFloat::opOverflow)  != 0 ||
1775        (Status & APFloat::opUnderflow) != 0)) {
1776     return false;
1777   }
1778 
1779   return true;
1780 }
1781 
1782 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1783   return isUIntN(Size, Val) || isIntN(Size, Val);
1784 }
1785 
1786 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1787   if (VT.getScalarType() == MVT::i16) {
1788     // FP immediate values are broken.
1789     return isInlinableIntLiteral(Val);
1790   }
1791 
1792   // f16/v2f16 operands work correctly for all values.
1793   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1794 }
1795 
1796 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1797 
1798   // This is a hack to enable named inline values like
1799   // shared_base with both 32-bit and 64-bit operands.
1800   // Note that these values are defined as
1801   // 32-bit operands only.
1802   if (isInlineValue()) {
1803     return true;
1804   }
1805 
1806   if (!isImmTy(ImmTyNone)) {
1807     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1808     return false;
1809   }
1810   // TODO: We should avoid using host float here. It would be better to
1811   // check the float bit values which is what a few other places do.
1812   // We've had bot failures before due to weird NaN support on mips hosts.
1813 
1814   APInt Literal(64, Imm.Val);
1815 
1816   if (Imm.IsFPImm) { // We got fp literal token
1817     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1818       return AMDGPU::isInlinableLiteral64(Imm.Val,
1819                                           AsmParser->hasInv2PiInlineImm());
1820     }
1821 
1822     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1823     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1824       return false;
1825 
1826     if (type.getScalarSizeInBits() == 16) {
1827       return isInlineableLiteralOp16(
1828         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1829         type, AsmParser->hasInv2PiInlineImm());
1830     }
1831 
1832     // Check if single precision literal is inlinable
1833     return AMDGPU::isInlinableLiteral32(
1834       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1835       AsmParser->hasInv2PiInlineImm());
1836   }
1837 
1838   // We got int literal token.
1839   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1840     return AMDGPU::isInlinableLiteral64(Imm.Val,
1841                                         AsmParser->hasInv2PiInlineImm());
1842   }
1843 
1844   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1845     return false;
1846   }
1847 
1848   if (type.getScalarSizeInBits() == 16) {
1849     return isInlineableLiteralOp16(
1850       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1851       type, AsmParser->hasInv2PiInlineImm());
1852   }
1853 
1854   return AMDGPU::isInlinableLiteral32(
1855     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1856     AsmParser->hasInv2PiInlineImm());
1857 }
1858 
1859 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1860   // Check that this immediate can be added as literal
1861   if (!isImmTy(ImmTyNone)) {
1862     return false;
1863   }
1864 
1865   if (!Imm.IsFPImm) {
1866     // We got int literal token.
1867 
1868     if (type == MVT::f64 && hasFPModifiers()) {
1869       // Cannot apply fp modifiers to int literals preserving the same semantics
1870       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1871       // disable these cases.
1872       return false;
1873     }
1874 
1875     unsigned Size = type.getSizeInBits();
1876     if (Size == 64)
1877       Size = 32;
1878 
1879     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1880     // types.
1881     return isSafeTruncation(Imm.Val, Size);
1882   }
1883 
1884   // We got fp literal token
1885   if (type == MVT::f64) { // Expected 64-bit fp operand
1886     // We would set low 64-bits of literal to zeroes but we accept this literals
1887     return true;
1888   }
1889 
1890   if (type == MVT::i64) { // Expected 64-bit int operand
1891     // We don't allow fp literals in 64-bit integer instructions. It is
1892     // unclear how we should encode them.
1893     return false;
1894   }
1895 
1896   // We allow fp literals with f16x2 operands assuming that the specified
1897   // literal goes into the lower half and the upper half is zero. We also
1898   // require that the literal may be losslesly converted to f16.
1899   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1900                      (type == MVT::v2i16)? MVT::i16 :
1901                      (type == MVT::v2f32)? MVT::f32 : type;
1902 
1903   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1904   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1905 }
1906 
1907 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1908   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1909 }
1910 
1911 bool AMDGPUOperand::isVRegWithInputMods() const {
1912   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1913          // GFX90A allows DPP on 64-bit operands.
1914          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1915           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1919   if (AsmParser->isVI())
1920     return isVReg32();
1921   else if (AsmParser->isGFX9Plus())
1922     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1923   else
1924     return false;
1925 }
1926 
1927 bool AMDGPUOperand::isSDWAFP16Operand() const {
1928   return isSDWAOperand(MVT::f16);
1929 }
1930 
1931 bool AMDGPUOperand::isSDWAFP32Operand() const {
1932   return isSDWAOperand(MVT::f32);
1933 }
1934 
1935 bool AMDGPUOperand::isSDWAInt16Operand() const {
1936   return isSDWAOperand(MVT::i16);
1937 }
1938 
1939 bool AMDGPUOperand::isSDWAInt32Operand() const {
1940   return isSDWAOperand(MVT::i32);
1941 }
1942 
1943 bool AMDGPUOperand::isBoolReg() const {
1944   auto FB = AsmParser->getFeatureBits();
1945   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1946                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1947 }
1948 
1949 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1950 {
1951   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1952   assert(Size == 2 || Size == 4 || Size == 8);
1953 
1954   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1955 
1956   if (Imm.Mods.Abs) {
1957     Val &= ~FpSignMask;
1958   }
1959   if (Imm.Mods.Neg) {
1960     Val ^= FpSignMask;
1961   }
1962 
1963   return Val;
1964 }
1965 
1966 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1967   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1968                              Inst.getNumOperands())) {
1969     addLiteralImmOperand(Inst, Imm.Val,
1970                          ApplyModifiers &
1971                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1972   } else {
1973     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1974     Inst.addOperand(MCOperand::createImm(Imm.Val));
1975     setImmKindNone();
1976   }
1977 }
1978 
1979 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1980   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1981   auto OpNum = Inst.getNumOperands();
1982   // Check that this operand accepts literals
1983   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1984 
1985   if (ApplyModifiers) {
1986     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1987     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1988     Val = applyInputFPModifiers(Val, Size);
1989   }
1990 
1991   APInt Literal(64, Val);
1992   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1993 
1994   if (Imm.IsFPImm) { // We got fp literal token
1995     switch (OpTy) {
1996     case AMDGPU::OPERAND_REG_IMM_INT64:
1997     case AMDGPU::OPERAND_REG_IMM_FP64:
1998     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1999     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2000     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2001       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2002                                        AsmParser->hasInv2PiInlineImm())) {
2003         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2004         setImmKindConst();
2005         return;
2006       }
2007 
2008       // Non-inlineable
2009       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2010         // For fp operands we check if low 32 bits are zeros
2011         if (Literal.getLoBits(32) != 0) {
2012           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2013           "Can't encode literal as exact 64-bit floating-point operand. "
2014           "Low 32-bits will be set to zero");
2015         }
2016 
2017         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2018         setImmKindLiteral();
2019         return;
2020       }
2021 
2022       // We don't allow fp literals in 64-bit integer instructions. It is
2023       // unclear how we should encode them. This case should be checked earlier
2024       // in predicate methods (isLiteralImm())
2025       llvm_unreachable("fp literal in 64-bit integer instruction.");
2026 
2027     case AMDGPU::OPERAND_REG_IMM_INT32:
2028     case AMDGPU::OPERAND_REG_IMM_FP32:
2029     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2030     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2031     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2034     case AMDGPU::OPERAND_REG_IMM_INT16:
2035     case AMDGPU::OPERAND_REG_IMM_FP16:
2036     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2037     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2038     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2039     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2040     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2041     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2042     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2043     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2044     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2045     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2046     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2047     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2048     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2049     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2050     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2051     case AMDGPU::OPERAND_KIMM32:
2052     case AMDGPU::OPERAND_KIMM16: {
2053       bool lost;
2054       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2055       // Convert literal to single precision
2056       FPLiteral.convert(*getOpFltSemantics(OpTy),
2057                         APFloat::rmNearestTiesToEven, &lost);
2058       // We allow precision lost but not overflow or underflow. This should be
2059       // checked earlier in isLiteralImm()
2060 
2061       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2062       Inst.addOperand(MCOperand::createImm(ImmVal));
2063       setImmKindLiteral();
2064       return;
2065     }
2066     default:
2067       llvm_unreachable("invalid operand size");
2068     }
2069 
2070     return;
2071   }
2072 
2073   // We got int literal token.
2074   // Only sign extend inline immediates.
2075   switch (OpTy) {
2076   case AMDGPU::OPERAND_REG_IMM_INT32:
2077   case AMDGPU::OPERAND_REG_IMM_FP32:
2078   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2079   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2080   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2081   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2082   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2083   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2084   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2085   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2086   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2087   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2088   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2089     if (isSafeTruncation(Val, 32) &&
2090         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2091                                      AsmParser->hasInv2PiInlineImm())) {
2092       Inst.addOperand(MCOperand::createImm(Val));
2093       setImmKindConst();
2094       return;
2095     }
2096 
2097     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2098     setImmKindLiteral();
2099     return;
2100 
2101   case AMDGPU::OPERAND_REG_IMM_INT64:
2102   case AMDGPU::OPERAND_REG_IMM_FP64:
2103   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2104   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2105   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2106     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2107       Inst.addOperand(MCOperand::createImm(Val));
2108       setImmKindConst();
2109       return;
2110     }
2111 
2112     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2113     setImmKindLiteral();
2114     return;
2115 
2116   case AMDGPU::OPERAND_REG_IMM_INT16:
2117   case AMDGPU::OPERAND_REG_IMM_FP16:
2118   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2119   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2120   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2123     if (isSafeTruncation(Val, 16) &&
2124         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2125                                      AsmParser->hasInv2PiInlineImm())) {
2126       Inst.addOperand(MCOperand::createImm(Val));
2127       setImmKindConst();
2128       return;
2129     }
2130 
2131     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2132     setImmKindLiteral();
2133     return;
2134 
2135   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2136   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2137   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2138   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2139     assert(isSafeTruncation(Val, 16));
2140     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2141                                         AsmParser->hasInv2PiInlineImm()));
2142 
2143     Inst.addOperand(MCOperand::createImm(Val));
2144     return;
2145   }
2146   case AMDGPU::OPERAND_KIMM32:
2147     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2148     setImmKindNone();
2149     return;
2150   case AMDGPU::OPERAND_KIMM16:
2151     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2152     setImmKindNone();
2153     return;
2154   default:
2155     llvm_unreachable("invalid operand size");
2156   }
2157 }
2158 
2159 template <unsigned Bitwidth>
2160 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2161   APInt Literal(64, Imm.Val);
2162   setImmKindNone();
2163 
2164   if (!Imm.IsFPImm) {
2165     // We got int literal token.
2166     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2167     return;
2168   }
2169 
2170   bool Lost;
2171   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2172   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2173                     APFloat::rmNearestTiesToEven, &Lost);
2174   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2175 }
2176 
2177 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2178   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2179 }
2180 
2181 static bool isInlineValue(unsigned Reg) {
2182   switch (Reg) {
2183   case AMDGPU::SRC_SHARED_BASE:
2184   case AMDGPU::SRC_SHARED_LIMIT:
2185   case AMDGPU::SRC_PRIVATE_BASE:
2186   case AMDGPU::SRC_PRIVATE_LIMIT:
2187   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2188     return true;
2189   case AMDGPU::SRC_VCCZ:
2190   case AMDGPU::SRC_EXECZ:
2191   case AMDGPU::SRC_SCC:
2192     return true;
2193   case AMDGPU::SGPR_NULL:
2194     return true;
2195   default:
2196     return false;
2197   }
2198 }
2199 
2200 bool AMDGPUOperand::isInlineValue() const {
2201   return isRegKind() && ::isInlineValue(getReg());
2202 }
2203 
2204 //===----------------------------------------------------------------------===//
2205 // AsmParser
2206 //===----------------------------------------------------------------------===//
2207 
2208 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2209   if (Is == IS_VGPR) {
2210     switch (RegWidth) {
2211       default: return -1;
2212       case 1: return AMDGPU::VGPR_32RegClassID;
2213       case 2: return AMDGPU::VReg_64RegClassID;
2214       case 3: return AMDGPU::VReg_96RegClassID;
2215       case 4: return AMDGPU::VReg_128RegClassID;
2216       case 5: return AMDGPU::VReg_160RegClassID;
2217       case 6: return AMDGPU::VReg_192RegClassID;
2218       case 7: return AMDGPU::VReg_224RegClassID;
2219       case 8: return AMDGPU::VReg_256RegClassID;
2220       case 16: return AMDGPU::VReg_512RegClassID;
2221       case 32: return AMDGPU::VReg_1024RegClassID;
2222     }
2223   } else if (Is == IS_TTMP) {
2224     switch (RegWidth) {
2225       default: return -1;
2226       case 1: return AMDGPU::TTMP_32RegClassID;
2227       case 2: return AMDGPU::TTMP_64RegClassID;
2228       case 4: return AMDGPU::TTMP_128RegClassID;
2229       case 8: return AMDGPU::TTMP_256RegClassID;
2230       case 16: return AMDGPU::TTMP_512RegClassID;
2231     }
2232   } else if (Is == IS_SGPR) {
2233     switch (RegWidth) {
2234       default: return -1;
2235       case 1: return AMDGPU::SGPR_32RegClassID;
2236       case 2: return AMDGPU::SGPR_64RegClassID;
2237       case 3: return AMDGPU::SGPR_96RegClassID;
2238       case 4: return AMDGPU::SGPR_128RegClassID;
2239       case 5: return AMDGPU::SGPR_160RegClassID;
2240       case 6: return AMDGPU::SGPR_192RegClassID;
2241       case 7: return AMDGPU::SGPR_224RegClassID;
2242       case 8: return AMDGPU::SGPR_256RegClassID;
2243       case 16: return AMDGPU::SGPR_512RegClassID;
2244     }
2245   } else if (Is == IS_AGPR) {
2246     switch (RegWidth) {
2247       default: return -1;
2248       case 1: return AMDGPU::AGPR_32RegClassID;
2249       case 2: return AMDGPU::AReg_64RegClassID;
2250       case 3: return AMDGPU::AReg_96RegClassID;
2251       case 4: return AMDGPU::AReg_128RegClassID;
2252       case 5: return AMDGPU::AReg_160RegClassID;
2253       case 6: return AMDGPU::AReg_192RegClassID;
2254       case 7: return AMDGPU::AReg_224RegClassID;
2255       case 8: return AMDGPU::AReg_256RegClassID;
2256       case 16: return AMDGPU::AReg_512RegClassID;
2257       case 32: return AMDGPU::AReg_1024RegClassID;
2258     }
2259   }
2260   return -1;
2261 }
2262 
2263 static unsigned getSpecialRegForName(StringRef RegName) {
2264   return StringSwitch<unsigned>(RegName)
2265     .Case("exec", AMDGPU::EXEC)
2266     .Case("vcc", AMDGPU::VCC)
2267     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2268     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2269     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2270     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2271     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2272     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2273     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2274     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2275     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2276     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2277     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2278     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2279     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2280     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2281     .Case("m0", AMDGPU::M0)
2282     .Case("vccz", AMDGPU::SRC_VCCZ)
2283     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2284     .Case("execz", AMDGPU::SRC_EXECZ)
2285     .Case("src_execz", AMDGPU::SRC_EXECZ)
2286     .Case("scc", AMDGPU::SRC_SCC)
2287     .Case("src_scc", AMDGPU::SRC_SCC)
2288     .Case("tba", AMDGPU::TBA)
2289     .Case("tma", AMDGPU::TMA)
2290     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2291     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2292     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2293     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2294     .Case("vcc_lo", AMDGPU::VCC_LO)
2295     .Case("vcc_hi", AMDGPU::VCC_HI)
2296     .Case("exec_lo", AMDGPU::EXEC_LO)
2297     .Case("exec_hi", AMDGPU::EXEC_HI)
2298     .Case("tma_lo", AMDGPU::TMA_LO)
2299     .Case("tma_hi", AMDGPU::TMA_HI)
2300     .Case("tba_lo", AMDGPU::TBA_LO)
2301     .Case("tba_hi", AMDGPU::TBA_HI)
2302     .Case("pc", AMDGPU::PC_REG)
2303     .Case("null", AMDGPU::SGPR_NULL)
2304     .Default(AMDGPU::NoRegister);
2305 }
2306 
2307 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2308                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2309   auto R = parseRegister();
2310   if (!R) return true;
2311   assert(R->isReg());
2312   RegNo = R->getReg();
2313   StartLoc = R->getStartLoc();
2314   EndLoc = R->getEndLoc();
2315   return false;
2316 }
2317 
2318 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2319                                     SMLoc &EndLoc) {
2320   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2321 }
2322 
2323 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2324                                                        SMLoc &StartLoc,
2325                                                        SMLoc &EndLoc) {
2326   bool Result =
2327       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2328   bool PendingErrors = getParser().hasPendingError();
2329   getParser().clearPendingErrors();
2330   if (PendingErrors)
2331     return MatchOperand_ParseFail;
2332   if (Result)
2333     return MatchOperand_NoMatch;
2334   return MatchOperand_Success;
2335 }
2336 
2337 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2338                                             RegisterKind RegKind, unsigned Reg1,
2339                                             SMLoc Loc) {
2340   switch (RegKind) {
2341   case IS_SPECIAL:
2342     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2343       Reg = AMDGPU::EXEC;
2344       RegWidth = 2;
2345       return true;
2346     }
2347     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2348       Reg = AMDGPU::FLAT_SCR;
2349       RegWidth = 2;
2350       return true;
2351     }
2352     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2353       Reg = AMDGPU::XNACK_MASK;
2354       RegWidth = 2;
2355       return true;
2356     }
2357     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2358       Reg = AMDGPU::VCC;
2359       RegWidth = 2;
2360       return true;
2361     }
2362     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2363       Reg = AMDGPU::TBA;
2364       RegWidth = 2;
2365       return true;
2366     }
2367     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2368       Reg = AMDGPU::TMA;
2369       RegWidth = 2;
2370       return true;
2371     }
2372     Error(Loc, "register does not fit in the list");
2373     return false;
2374   case IS_VGPR:
2375   case IS_SGPR:
2376   case IS_AGPR:
2377   case IS_TTMP:
2378     if (Reg1 != Reg + RegWidth) {
2379       Error(Loc, "registers in a list must have consecutive indices");
2380       return false;
2381     }
2382     RegWidth++;
2383     return true;
2384   default:
2385     llvm_unreachable("unexpected register kind");
2386   }
2387 }
2388 
2389 struct RegInfo {
2390   StringLiteral Name;
2391   RegisterKind Kind;
2392 };
2393 
2394 static constexpr RegInfo RegularRegisters[] = {
2395   {{"v"},    IS_VGPR},
2396   {{"s"},    IS_SGPR},
2397   {{"ttmp"}, IS_TTMP},
2398   {{"acc"},  IS_AGPR},
2399   {{"a"},    IS_AGPR},
2400 };
2401 
2402 static bool isRegularReg(RegisterKind Kind) {
2403   return Kind == IS_VGPR ||
2404          Kind == IS_SGPR ||
2405          Kind == IS_TTMP ||
2406          Kind == IS_AGPR;
2407 }
2408 
2409 static const RegInfo* getRegularRegInfo(StringRef Str) {
2410   for (const RegInfo &Reg : RegularRegisters)
2411     if (Str.startswith(Reg.Name))
2412       return &Reg;
2413   return nullptr;
2414 }
2415 
2416 static bool getRegNum(StringRef Str, unsigned& Num) {
2417   return !Str.getAsInteger(10, Num);
2418 }
2419 
2420 bool
2421 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2422                             const AsmToken &NextToken) const {
2423 
2424   // A list of consecutive registers: [s0,s1,s2,s3]
2425   if (Token.is(AsmToken::LBrac))
2426     return true;
2427 
2428   if (!Token.is(AsmToken::Identifier))
2429     return false;
2430 
2431   // A single register like s0 or a range of registers like s[0:1]
2432 
2433   StringRef Str = Token.getString();
2434   const RegInfo *Reg = getRegularRegInfo(Str);
2435   if (Reg) {
2436     StringRef RegName = Reg->Name;
2437     StringRef RegSuffix = Str.substr(RegName.size());
2438     if (!RegSuffix.empty()) {
2439       unsigned Num;
2440       // A single register with an index: rXX
2441       if (getRegNum(RegSuffix, Num))
2442         return true;
2443     } else {
2444       // A range of registers: r[XX:YY].
2445       if (NextToken.is(AsmToken::LBrac))
2446         return true;
2447     }
2448   }
2449 
2450   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2451 }
2452 
2453 bool
2454 AMDGPUAsmParser::isRegister()
2455 {
2456   return isRegister(getToken(), peekToken());
2457 }
2458 
2459 unsigned
2460 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2461                                unsigned RegNum,
2462                                unsigned RegWidth,
2463                                SMLoc Loc) {
2464 
2465   assert(isRegularReg(RegKind));
2466 
2467   unsigned AlignSize = 1;
2468   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2469     // SGPR and TTMP registers must be aligned.
2470     // Max required alignment is 4 dwords.
2471     AlignSize = std::min(RegWidth, 4u);
2472   }
2473 
2474   if (RegNum % AlignSize != 0) {
2475     Error(Loc, "invalid register alignment");
2476     return AMDGPU::NoRegister;
2477   }
2478 
2479   unsigned RegIdx = RegNum / AlignSize;
2480   int RCID = getRegClass(RegKind, RegWidth);
2481   if (RCID == -1) {
2482     Error(Loc, "invalid or unsupported register size");
2483     return AMDGPU::NoRegister;
2484   }
2485 
2486   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2487   const MCRegisterClass RC = TRI->getRegClass(RCID);
2488   if (RegIdx >= RC.getNumRegs()) {
2489     Error(Loc, "register index is out of range");
2490     return AMDGPU::NoRegister;
2491   }
2492 
2493   return RC.getRegister(RegIdx);
2494 }
2495 
2496 bool
2497 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2498   int64_t RegLo, RegHi;
2499   if (!skipToken(AsmToken::LBrac, "missing register index"))
2500     return false;
2501 
2502   SMLoc FirstIdxLoc = getLoc();
2503   SMLoc SecondIdxLoc;
2504 
2505   if (!parseExpr(RegLo))
2506     return false;
2507 
2508   if (trySkipToken(AsmToken::Colon)) {
2509     SecondIdxLoc = getLoc();
2510     if (!parseExpr(RegHi))
2511       return false;
2512   } else {
2513     RegHi = RegLo;
2514   }
2515 
2516   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2517     return false;
2518 
2519   if (!isUInt<32>(RegLo)) {
2520     Error(FirstIdxLoc, "invalid register index");
2521     return false;
2522   }
2523 
2524   if (!isUInt<32>(RegHi)) {
2525     Error(SecondIdxLoc, "invalid register index");
2526     return false;
2527   }
2528 
2529   if (RegLo > RegHi) {
2530     Error(FirstIdxLoc, "first register index should not exceed second index");
2531     return false;
2532   }
2533 
2534   Num = static_cast<unsigned>(RegLo);
2535   Width = (RegHi - RegLo) + 1;
2536   return true;
2537 }
2538 
2539 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2540                                           unsigned &RegNum, unsigned &RegWidth,
2541                                           SmallVectorImpl<AsmToken> &Tokens) {
2542   assert(isToken(AsmToken::Identifier));
2543   unsigned Reg = getSpecialRegForName(getTokenStr());
2544   if (Reg) {
2545     RegNum = 0;
2546     RegWidth = 1;
2547     RegKind = IS_SPECIAL;
2548     Tokens.push_back(getToken());
2549     lex(); // skip register name
2550   }
2551   return Reg;
2552 }
2553 
2554 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2555                                           unsigned &RegNum, unsigned &RegWidth,
2556                                           SmallVectorImpl<AsmToken> &Tokens) {
2557   assert(isToken(AsmToken::Identifier));
2558   StringRef RegName = getTokenStr();
2559   auto Loc = getLoc();
2560 
2561   const RegInfo *RI = getRegularRegInfo(RegName);
2562   if (!RI) {
2563     Error(Loc, "invalid register name");
2564     return AMDGPU::NoRegister;
2565   }
2566 
2567   Tokens.push_back(getToken());
2568   lex(); // skip register name
2569 
2570   RegKind = RI->Kind;
2571   StringRef RegSuffix = RegName.substr(RI->Name.size());
2572   if (!RegSuffix.empty()) {
2573     // Single 32-bit register: vXX.
2574     if (!getRegNum(RegSuffix, RegNum)) {
2575       Error(Loc, "invalid register index");
2576       return AMDGPU::NoRegister;
2577     }
2578     RegWidth = 1;
2579   } else {
2580     // Range of registers: v[XX:YY]. ":YY" is optional.
2581     if (!ParseRegRange(RegNum, RegWidth))
2582       return AMDGPU::NoRegister;
2583   }
2584 
2585   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2586 }
2587 
2588 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2589                                        unsigned &RegWidth,
2590                                        SmallVectorImpl<AsmToken> &Tokens) {
2591   unsigned Reg = AMDGPU::NoRegister;
2592   auto ListLoc = getLoc();
2593 
2594   if (!skipToken(AsmToken::LBrac,
2595                  "expected a register or a list of registers")) {
2596     return AMDGPU::NoRegister;
2597   }
2598 
2599   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2600 
2601   auto Loc = getLoc();
2602   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2603     return AMDGPU::NoRegister;
2604   if (RegWidth != 1) {
2605     Error(Loc, "expected a single 32-bit register");
2606     return AMDGPU::NoRegister;
2607   }
2608 
2609   for (; trySkipToken(AsmToken::Comma); ) {
2610     RegisterKind NextRegKind;
2611     unsigned NextReg, NextRegNum, NextRegWidth;
2612     Loc = getLoc();
2613 
2614     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2615                              NextRegNum, NextRegWidth,
2616                              Tokens)) {
2617       return AMDGPU::NoRegister;
2618     }
2619     if (NextRegWidth != 1) {
2620       Error(Loc, "expected a single 32-bit register");
2621       return AMDGPU::NoRegister;
2622     }
2623     if (NextRegKind != RegKind) {
2624       Error(Loc, "registers in a list must be of the same kind");
2625       return AMDGPU::NoRegister;
2626     }
2627     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2628       return AMDGPU::NoRegister;
2629   }
2630 
2631   if (!skipToken(AsmToken::RBrac,
2632                  "expected a comma or a closing square bracket")) {
2633     return AMDGPU::NoRegister;
2634   }
2635 
2636   if (isRegularReg(RegKind))
2637     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2638 
2639   return Reg;
2640 }
2641 
2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2643                                           unsigned &RegNum, unsigned &RegWidth,
2644                                           SmallVectorImpl<AsmToken> &Tokens) {
2645   auto Loc = getLoc();
2646   Reg = AMDGPU::NoRegister;
2647 
2648   if (isToken(AsmToken::Identifier)) {
2649     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2650     if (Reg == AMDGPU::NoRegister)
2651       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2652   } else {
2653     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2654   }
2655 
2656   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2657   if (Reg == AMDGPU::NoRegister) {
2658     assert(Parser.hasPendingError());
2659     return false;
2660   }
2661 
2662   if (!subtargetHasRegister(*TRI, Reg)) {
2663     if (Reg == AMDGPU::SGPR_NULL) {
2664       Error(Loc, "'null' operand is not supported on this GPU");
2665     } else {
2666       Error(Loc, "register not available on this GPU");
2667     }
2668     return false;
2669   }
2670 
2671   return true;
2672 }
2673 
2674 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2675                                           unsigned &RegNum, unsigned &RegWidth,
2676                                           bool RestoreOnFailure /*=false*/) {
2677   Reg = AMDGPU::NoRegister;
2678 
2679   SmallVector<AsmToken, 1> Tokens;
2680   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2681     if (RestoreOnFailure) {
2682       while (!Tokens.empty()) {
2683         getLexer().UnLex(Tokens.pop_back_val());
2684       }
2685     }
2686     return true;
2687   }
2688   return false;
2689 }
2690 
2691 Optional<StringRef>
2692 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2693   switch (RegKind) {
2694   case IS_VGPR:
2695     return StringRef(".amdgcn.next_free_vgpr");
2696   case IS_SGPR:
2697     return StringRef(".amdgcn.next_free_sgpr");
2698   default:
2699     return None;
2700   }
2701 }
2702 
2703 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2704   auto SymbolName = getGprCountSymbolName(RegKind);
2705   assert(SymbolName && "initializing invalid register kind");
2706   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2707   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2708 }
2709 
2710 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2711                                             unsigned DwordRegIndex,
2712                                             unsigned RegWidth) {
2713   // Symbols are only defined for GCN targets
2714   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2715     return true;
2716 
2717   auto SymbolName = getGprCountSymbolName(RegKind);
2718   if (!SymbolName)
2719     return true;
2720   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2721 
2722   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2723   int64_t OldCount;
2724 
2725   if (!Sym->isVariable())
2726     return !Error(getLoc(),
2727                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2728   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2729     return !Error(
2730         getLoc(),
2731         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2732 
2733   if (OldCount <= NewMax)
2734     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2735 
2736   return true;
2737 }
2738 
2739 std::unique_ptr<AMDGPUOperand>
2740 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2741   const auto &Tok = getToken();
2742   SMLoc StartLoc = Tok.getLoc();
2743   SMLoc EndLoc = Tok.getEndLoc();
2744   RegisterKind RegKind;
2745   unsigned Reg, RegNum, RegWidth;
2746 
2747   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2748     return nullptr;
2749   }
2750   if (isHsaAbiVersion3Or4(&getSTI())) {
2751     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2752       return nullptr;
2753   } else
2754     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2755   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2756 }
2757 
2758 OperandMatchResultTy
2759 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2760   // TODO: add syntactic sugar for 1/(2*PI)
2761 
2762   assert(!isRegister());
2763   assert(!isModifier());
2764 
2765   const auto& Tok = getToken();
2766   const auto& NextTok = peekToken();
2767   bool IsReal = Tok.is(AsmToken::Real);
2768   SMLoc S = getLoc();
2769   bool Negate = false;
2770 
2771   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2772     lex();
2773     IsReal = true;
2774     Negate = true;
2775   }
2776 
2777   if (IsReal) {
2778     // Floating-point expressions are not supported.
2779     // Can only allow floating-point literals with an
2780     // optional sign.
2781 
2782     StringRef Num = getTokenStr();
2783     lex();
2784 
2785     APFloat RealVal(APFloat::IEEEdouble());
2786     auto roundMode = APFloat::rmNearestTiesToEven;
2787     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2788       return MatchOperand_ParseFail;
2789     }
2790     if (Negate)
2791       RealVal.changeSign();
2792 
2793     Operands.push_back(
2794       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2795                                AMDGPUOperand::ImmTyNone, true));
2796 
2797     return MatchOperand_Success;
2798 
2799   } else {
2800     int64_t IntVal;
2801     const MCExpr *Expr;
2802     SMLoc S = getLoc();
2803 
2804     if (HasSP3AbsModifier) {
2805       // This is a workaround for handling expressions
2806       // as arguments of SP3 'abs' modifier, for example:
2807       //     |1.0|
2808       //     |-1|
2809       //     |1+x|
2810       // This syntax is not compatible with syntax of standard
2811       // MC expressions (due to the trailing '|').
2812       SMLoc EndLoc;
2813       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2814         return MatchOperand_ParseFail;
2815     } else {
2816       if (Parser.parseExpression(Expr))
2817         return MatchOperand_ParseFail;
2818     }
2819 
2820     if (Expr->evaluateAsAbsolute(IntVal)) {
2821       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2822     } else {
2823       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2824     }
2825 
2826     return MatchOperand_Success;
2827   }
2828 
2829   return MatchOperand_NoMatch;
2830 }
2831 
2832 OperandMatchResultTy
2833 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2834   if (!isRegister())
2835     return MatchOperand_NoMatch;
2836 
2837   if (auto R = parseRegister()) {
2838     assert(R->isReg());
2839     Operands.push_back(std::move(R));
2840     return MatchOperand_Success;
2841   }
2842   return MatchOperand_ParseFail;
2843 }
2844 
2845 OperandMatchResultTy
2846 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2847   auto res = parseReg(Operands);
2848   if (res != MatchOperand_NoMatch) {
2849     return res;
2850   } else if (isModifier()) {
2851     return MatchOperand_NoMatch;
2852   } else {
2853     return parseImm(Operands, HasSP3AbsMod);
2854   }
2855 }
2856 
2857 bool
2858 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2859   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2860     const auto &str = Token.getString();
2861     return str == "abs" || str == "neg" || str == "sext";
2862   }
2863   return false;
2864 }
2865 
2866 bool
2867 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2868   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2869 }
2870 
2871 bool
2872 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2873   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2874 }
2875 
2876 bool
2877 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2878   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2879 }
2880 
2881 // Check if this is an operand modifier or an opcode modifier
2882 // which may look like an expression but it is not. We should
2883 // avoid parsing these modifiers as expressions. Currently
2884 // recognized sequences are:
2885 //   |...|
2886 //   abs(...)
2887 //   neg(...)
2888 //   sext(...)
2889 //   -reg
2890 //   -|...|
2891 //   -abs(...)
2892 //   name:...
2893 // Note that simple opcode modifiers like 'gds' may be parsed as
2894 // expressions; this is a special case. See getExpressionAsToken.
2895 //
2896 bool
2897 AMDGPUAsmParser::isModifier() {
2898 
2899   AsmToken Tok = getToken();
2900   AsmToken NextToken[2];
2901   peekTokens(NextToken);
2902 
2903   return isOperandModifier(Tok, NextToken[0]) ||
2904          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2905          isOpcodeModifierWithVal(Tok, NextToken[0]);
2906 }
2907 
2908 // Check if the current token is an SP3 'neg' modifier.
2909 // Currently this modifier is allowed in the following context:
2910 //
2911 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2912 // 2. Before an 'abs' modifier: -abs(...)
2913 // 3. Before an SP3 'abs' modifier: -|...|
2914 //
2915 // In all other cases "-" is handled as a part
2916 // of an expression that follows the sign.
2917 //
2918 // Note: When "-" is followed by an integer literal,
2919 // this is interpreted as integer negation rather
2920 // than a floating-point NEG modifier applied to N.
2921 // Beside being contr-intuitive, such use of floating-point
2922 // NEG modifier would have resulted in different meaning
2923 // of integer literals used with VOP1/2/C and VOP3,
2924 // for example:
2925 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2926 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2927 // Negative fp literals with preceding "-" are
2928 // handled likewise for unifomtity
2929 //
2930 bool
2931 AMDGPUAsmParser::parseSP3NegModifier() {
2932 
2933   AsmToken NextToken[2];
2934   peekTokens(NextToken);
2935 
2936   if (isToken(AsmToken::Minus) &&
2937       (isRegister(NextToken[0], NextToken[1]) ||
2938        NextToken[0].is(AsmToken::Pipe) ||
2939        isId(NextToken[0], "abs"))) {
2940     lex();
2941     return true;
2942   }
2943 
2944   return false;
2945 }
2946 
2947 OperandMatchResultTy
2948 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2949                                               bool AllowImm) {
2950   bool Neg, SP3Neg;
2951   bool Abs, SP3Abs;
2952   SMLoc Loc;
2953 
2954   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2955   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2956     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2957     return MatchOperand_ParseFail;
2958   }
2959 
2960   SP3Neg = parseSP3NegModifier();
2961 
2962   Loc = getLoc();
2963   Neg = trySkipId("neg");
2964   if (Neg && SP3Neg) {
2965     Error(Loc, "expected register or immediate");
2966     return MatchOperand_ParseFail;
2967   }
2968   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2969     return MatchOperand_ParseFail;
2970 
2971   Abs = trySkipId("abs");
2972   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2973     return MatchOperand_ParseFail;
2974 
2975   Loc = getLoc();
2976   SP3Abs = trySkipToken(AsmToken::Pipe);
2977   if (Abs && SP3Abs) {
2978     Error(Loc, "expected register or immediate");
2979     return MatchOperand_ParseFail;
2980   }
2981 
2982   OperandMatchResultTy Res;
2983   if (AllowImm) {
2984     Res = parseRegOrImm(Operands, SP3Abs);
2985   } else {
2986     Res = parseReg(Operands);
2987   }
2988   if (Res != MatchOperand_Success) {
2989     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2990   }
2991 
2992   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2993     return MatchOperand_ParseFail;
2994   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2995     return MatchOperand_ParseFail;
2996   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2997     return MatchOperand_ParseFail;
2998 
2999   AMDGPUOperand::Modifiers Mods;
3000   Mods.Abs = Abs || SP3Abs;
3001   Mods.Neg = Neg || SP3Neg;
3002 
3003   if (Mods.hasFPModifiers()) {
3004     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3005     if (Op.isExpr()) {
3006       Error(Op.getStartLoc(), "expected an absolute expression");
3007       return MatchOperand_ParseFail;
3008     }
3009     Op.setModifiers(Mods);
3010   }
3011   return MatchOperand_Success;
3012 }
3013 
3014 OperandMatchResultTy
3015 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3016                                                bool AllowImm) {
3017   bool Sext = trySkipId("sext");
3018   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3019     return MatchOperand_ParseFail;
3020 
3021   OperandMatchResultTy Res;
3022   if (AllowImm) {
3023     Res = parseRegOrImm(Operands);
3024   } else {
3025     Res = parseReg(Operands);
3026   }
3027   if (Res != MatchOperand_Success) {
3028     return Sext? MatchOperand_ParseFail : Res;
3029   }
3030 
3031   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3032     return MatchOperand_ParseFail;
3033 
3034   AMDGPUOperand::Modifiers Mods;
3035   Mods.Sext = Sext;
3036 
3037   if (Mods.hasIntModifiers()) {
3038     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3039     if (Op.isExpr()) {
3040       Error(Op.getStartLoc(), "expected an absolute expression");
3041       return MatchOperand_ParseFail;
3042     }
3043     Op.setModifiers(Mods);
3044   }
3045 
3046   return MatchOperand_Success;
3047 }
3048 
3049 OperandMatchResultTy
3050 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3051   return parseRegOrImmWithFPInputMods(Operands, false);
3052 }
3053 
3054 OperandMatchResultTy
3055 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3056   return parseRegOrImmWithIntInputMods(Operands, false);
3057 }
3058 
3059 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3060   auto Loc = getLoc();
3061   if (trySkipId("off")) {
3062     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3063                                                 AMDGPUOperand::ImmTyOff, false));
3064     return MatchOperand_Success;
3065   }
3066 
3067   if (!isRegister())
3068     return MatchOperand_NoMatch;
3069 
3070   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3071   if (Reg) {
3072     Operands.push_back(std::move(Reg));
3073     return MatchOperand_Success;
3074   }
3075 
3076   return MatchOperand_ParseFail;
3077 
3078 }
3079 
3080 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3081   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3082 
3083   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3084       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3085       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3086       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3087     return Match_InvalidOperand;
3088 
3089   if ((TSFlags & SIInstrFlags::VOP3) &&
3090       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3091       getForcedEncodingSize() != 64)
3092     return Match_PreferE32;
3093 
3094   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3095       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3096     // v_mac_f32/16 allow only dst_sel == DWORD;
3097     auto OpNum =
3098         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3099     const auto &Op = Inst.getOperand(OpNum);
3100     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3101       return Match_InvalidOperand;
3102     }
3103   }
3104 
3105   return Match_Success;
3106 }
3107 
3108 static ArrayRef<unsigned> getAllVariants() {
3109   static const unsigned Variants[] = {
3110     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3111     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3112   };
3113 
3114   return makeArrayRef(Variants);
3115 }
3116 
3117 // What asm variants we should check
3118 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3119   if (getForcedEncodingSize() == 32) {
3120     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3121     return makeArrayRef(Variants);
3122   }
3123 
3124   if (isForcedVOP3()) {
3125     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3126     return makeArrayRef(Variants);
3127   }
3128 
3129   if (isForcedSDWA()) {
3130     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3131                                         AMDGPUAsmVariants::SDWA9};
3132     return makeArrayRef(Variants);
3133   }
3134 
3135   if (isForcedDPP()) {
3136     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3137     return makeArrayRef(Variants);
3138   }
3139 
3140   return getAllVariants();
3141 }
3142 
3143 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3144   if (getForcedEncodingSize() == 32)
3145     return "e32";
3146 
3147   if (isForcedVOP3())
3148     return "e64";
3149 
3150   if (isForcedSDWA())
3151     return "sdwa";
3152 
3153   if (isForcedDPP())
3154     return "dpp";
3155 
3156   return "";
3157 }
3158 
3159 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3160   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3161   const unsigned Num = Desc.getNumImplicitUses();
3162   for (unsigned i = 0; i < Num; ++i) {
3163     unsigned Reg = Desc.ImplicitUses[i];
3164     switch (Reg) {
3165     case AMDGPU::FLAT_SCR:
3166     case AMDGPU::VCC:
3167     case AMDGPU::VCC_LO:
3168     case AMDGPU::VCC_HI:
3169     case AMDGPU::M0:
3170       return Reg;
3171     default:
3172       break;
3173     }
3174   }
3175   return AMDGPU::NoRegister;
3176 }
3177 
3178 // NB: This code is correct only when used to check constant
3179 // bus limitations because GFX7 support no f16 inline constants.
3180 // Note that there are no cases when a GFX7 opcode violates
3181 // constant bus limitations due to the use of an f16 constant.
3182 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3183                                        unsigned OpIdx) const {
3184   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3185 
3186   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3187     return false;
3188   }
3189 
3190   const MCOperand &MO = Inst.getOperand(OpIdx);
3191 
3192   int64_t Val = MO.getImm();
3193   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3194 
3195   switch (OpSize) { // expected operand size
3196   case 8:
3197     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3198   case 4:
3199     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3200   case 2: {
3201     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3202     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3203         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3204         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3205       return AMDGPU::isInlinableIntLiteral(Val);
3206 
3207     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3208         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3209         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3210       return AMDGPU::isInlinableIntLiteralV216(Val);
3211 
3212     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3213         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3214         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3215       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3216 
3217     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3218   }
3219   default:
3220     llvm_unreachable("invalid operand size");
3221   }
3222 }
3223 
3224 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3225   if (!isGFX10Plus())
3226     return 1;
3227 
3228   switch (Opcode) {
3229   // 64-bit shift instructions can use only one scalar value input
3230   case AMDGPU::V_LSHLREV_B64_e64:
3231   case AMDGPU::V_LSHLREV_B64_gfx10:
3232   case AMDGPU::V_LSHRREV_B64_e64:
3233   case AMDGPU::V_LSHRREV_B64_gfx10:
3234   case AMDGPU::V_ASHRREV_I64_e64:
3235   case AMDGPU::V_ASHRREV_I64_gfx10:
3236   case AMDGPU::V_LSHL_B64_e64:
3237   case AMDGPU::V_LSHR_B64_e64:
3238   case AMDGPU::V_ASHR_I64_e64:
3239     return 1;
3240   default:
3241     return 2;
3242   }
3243 }
3244 
3245 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3246   const MCOperand &MO = Inst.getOperand(OpIdx);
3247   if (MO.isImm()) {
3248     return !isInlineConstant(Inst, OpIdx);
3249   } else if (MO.isReg()) {
3250     auto Reg = MO.getReg();
3251     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3252     auto PReg = mc2PseudoReg(Reg);
3253     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3254   } else {
3255     return true;
3256   }
3257 }
3258 
3259 bool
3260 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3261                                                 const OperandVector &Operands) {
3262   const unsigned Opcode = Inst.getOpcode();
3263   const MCInstrDesc &Desc = MII.get(Opcode);
3264   unsigned LastSGPR = AMDGPU::NoRegister;
3265   unsigned ConstantBusUseCount = 0;
3266   unsigned NumLiterals = 0;
3267   unsigned LiteralSize;
3268 
3269   if (Desc.TSFlags &
3270       (SIInstrFlags::VOPC |
3271        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3272        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3273        SIInstrFlags::SDWA)) {
3274     // Check special imm operands (used by madmk, etc)
3275     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3276       ++NumLiterals;
3277       LiteralSize = 4;
3278     }
3279 
3280     SmallDenseSet<unsigned> SGPRsUsed;
3281     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3282     if (SGPRUsed != AMDGPU::NoRegister) {
3283       SGPRsUsed.insert(SGPRUsed);
3284       ++ConstantBusUseCount;
3285     }
3286 
3287     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3288     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3289     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3290 
3291     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3292 
3293     for (int OpIdx : OpIndices) {
3294       if (OpIdx == -1) break;
3295 
3296       const MCOperand &MO = Inst.getOperand(OpIdx);
3297       if (usesConstantBus(Inst, OpIdx)) {
3298         if (MO.isReg()) {
3299           LastSGPR = mc2PseudoReg(MO.getReg());
3300           // Pairs of registers with a partial intersections like these
3301           //   s0, s[0:1]
3302           //   flat_scratch_lo, flat_scratch
3303           //   flat_scratch_lo, flat_scratch_hi
3304           // are theoretically valid but they are disabled anyway.
3305           // Note that this code mimics SIInstrInfo::verifyInstruction
3306           if (!SGPRsUsed.count(LastSGPR)) {
3307             SGPRsUsed.insert(LastSGPR);
3308             ++ConstantBusUseCount;
3309           }
3310         } else { // Expression or a literal
3311 
3312           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3313             continue; // special operand like VINTERP attr_chan
3314 
3315           // An instruction may use only one literal.
3316           // This has been validated on the previous step.
3317           // See validateVOPLiteral.
3318           // This literal may be used as more than one operand.
3319           // If all these operands are of the same size,
3320           // this literal counts as one scalar value.
3321           // Otherwise it counts as 2 scalar values.
3322           // See "GFX10 Shader Programming", section 3.6.2.3.
3323 
3324           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3325           if (Size < 4) Size = 4;
3326 
3327           if (NumLiterals == 0) {
3328             NumLiterals = 1;
3329             LiteralSize = Size;
3330           } else if (LiteralSize != Size) {
3331             NumLiterals = 2;
3332           }
3333         }
3334       }
3335     }
3336   }
3337   ConstantBusUseCount += NumLiterals;
3338 
3339   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3340     return true;
3341 
3342   SMLoc LitLoc = getLitLoc(Operands);
3343   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3344   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3345   Error(Loc, "invalid operand (violates constant bus restrictions)");
3346   return false;
3347 }
3348 
3349 bool
3350 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3351                                                  const OperandVector &Operands) {
3352   const unsigned Opcode = Inst.getOpcode();
3353   const MCInstrDesc &Desc = MII.get(Opcode);
3354 
3355   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3356   if (DstIdx == -1 ||
3357       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3358     return true;
3359   }
3360 
3361   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3362 
3363   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3364   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3365   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3366 
3367   assert(DstIdx != -1);
3368   const MCOperand &Dst = Inst.getOperand(DstIdx);
3369   assert(Dst.isReg());
3370   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3371 
3372   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3373 
3374   for (int SrcIdx : SrcIndices) {
3375     if (SrcIdx == -1) break;
3376     const MCOperand &Src = Inst.getOperand(SrcIdx);
3377     if (Src.isReg()) {
3378       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3379       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3380         Error(getRegLoc(SrcReg, Operands),
3381           "destination must be different than all sources");
3382         return false;
3383       }
3384     }
3385   }
3386 
3387   return true;
3388 }
3389 
3390 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3391 
3392   const unsigned Opc = Inst.getOpcode();
3393   const MCInstrDesc &Desc = MII.get(Opc);
3394 
3395   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3396     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3397     assert(ClampIdx != -1);
3398     return Inst.getOperand(ClampIdx).getImm() == 0;
3399   }
3400 
3401   return true;
3402 }
3403 
3404 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3405 
3406   const unsigned Opc = Inst.getOpcode();
3407   const MCInstrDesc &Desc = MII.get(Opc);
3408 
3409   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3410     return true;
3411 
3412   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3413   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3414   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3415 
3416   assert(VDataIdx != -1);
3417 
3418   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3419     return true;
3420 
3421   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3422   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3423   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3424   if (DMask == 0)
3425     DMask = 1;
3426 
3427   unsigned DataSize =
3428     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3429   if (hasPackedD16()) {
3430     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3431     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3432       DataSize = (DataSize + 1) / 2;
3433   }
3434 
3435   return (VDataSize / 4) == DataSize + TFESize;
3436 }
3437 
3438 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3439   const unsigned Opc = Inst.getOpcode();
3440   const MCInstrDesc &Desc = MII.get(Opc);
3441 
3442   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3443     return true;
3444 
3445   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3446 
3447   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3448       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3449   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3450   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3451   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3452   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3453 
3454   assert(VAddr0Idx != -1);
3455   assert(SrsrcIdx != -1);
3456   assert(SrsrcIdx > VAddr0Idx);
3457 
3458   if (DimIdx == -1)
3459     return true; // intersect_ray
3460 
3461   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3462   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3463   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3464   unsigned ActualAddrSize =
3465       IsNSA ? SrsrcIdx - VAddr0Idx
3466             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3467   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3468 
3469   unsigned ExpectedAddrSize =
3470       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3471 
3472   if (!IsNSA) {
3473     if (ExpectedAddrSize > 8)
3474       ExpectedAddrSize = 16;
3475 
3476     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3477     // This provides backward compatibility for assembly created
3478     // before 160b/192b/224b types were directly supported.
3479     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3480       return true;
3481   }
3482 
3483   return ActualAddrSize == ExpectedAddrSize;
3484 }
3485 
3486 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3487 
3488   const unsigned Opc = Inst.getOpcode();
3489   const MCInstrDesc &Desc = MII.get(Opc);
3490 
3491   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3492     return true;
3493   if (!Desc.mayLoad() || !Desc.mayStore())
3494     return true; // Not atomic
3495 
3496   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3497   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3498 
3499   // This is an incomplete check because image_atomic_cmpswap
3500   // may only use 0x3 and 0xf while other atomic operations
3501   // may use 0x1 and 0x3. However these limitations are
3502   // verified when we check that dmask matches dst size.
3503   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3504 }
3505 
3506 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3507 
3508   const unsigned Opc = Inst.getOpcode();
3509   const MCInstrDesc &Desc = MII.get(Opc);
3510 
3511   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3512     return true;
3513 
3514   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3515   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3516 
3517   // GATHER4 instructions use dmask in a different fashion compared to
3518   // other MIMG instructions. The only useful DMASK values are
3519   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3520   // (red,red,red,red) etc.) The ISA document doesn't mention
3521   // this.
3522   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3523 }
3524 
3525 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3526   const unsigned Opc = Inst.getOpcode();
3527   const MCInstrDesc &Desc = MII.get(Opc);
3528 
3529   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3530     return true;
3531 
3532   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3533   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3534       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3535 
3536   if (!BaseOpcode->MSAA)
3537     return true;
3538 
3539   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3540   assert(DimIdx != -1);
3541 
3542   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3543   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3544 
3545   return DimInfo->MSAA;
3546 }
3547 
3548 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3549 {
3550   switch (Opcode) {
3551   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3552   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3553   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3554     return true;
3555   default:
3556     return false;
3557   }
3558 }
3559 
3560 // movrels* opcodes should only allow VGPRS as src0.
3561 // This is specified in .td description for vop1/vop3,
3562 // but sdwa is handled differently. See isSDWAOperand.
3563 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3564                                       const OperandVector &Operands) {
3565 
3566   const unsigned Opc = Inst.getOpcode();
3567   const MCInstrDesc &Desc = MII.get(Opc);
3568 
3569   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3570     return true;
3571 
3572   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3573   assert(Src0Idx != -1);
3574 
3575   SMLoc ErrLoc;
3576   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3577   if (Src0.isReg()) {
3578     auto Reg = mc2PseudoReg(Src0.getReg());
3579     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3580     if (!isSGPR(Reg, TRI))
3581       return true;
3582     ErrLoc = getRegLoc(Reg, Operands);
3583   } else {
3584     ErrLoc = getConstLoc(Operands);
3585   }
3586 
3587   Error(ErrLoc, "source operand must be a VGPR");
3588   return false;
3589 }
3590 
3591 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3592                                           const OperandVector &Operands) {
3593 
3594   const unsigned Opc = Inst.getOpcode();
3595 
3596   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3597     return true;
3598 
3599   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3600   assert(Src0Idx != -1);
3601 
3602   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3603   if (!Src0.isReg())
3604     return true;
3605 
3606   auto Reg = mc2PseudoReg(Src0.getReg());
3607   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3608   if (isSGPR(Reg, TRI)) {
3609     Error(getRegLoc(Reg, Operands),
3610           "source operand must be either a VGPR or an inline constant");
3611     return false;
3612   }
3613 
3614   return true;
3615 }
3616 
3617 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3618                                    const OperandVector &Operands) {
3619   const unsigned Opc = Inst.getOpcode();
3620   const MCInstrDesc &Desc = MII.get(Opc);
3621 
3622   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3623     return true;
3624 
3625   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3626   if (Src2Idx == -1)
3627     return true;
3628 
3629   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3630   if (!Src2.isReg())
3631     return true;
3632 
3633   MCRegister Src2Reg = Src2.getReg();
3634   MCRegister DstReg = Inst.getOperand(0).getReg();
3635   if (Src2Reg == DstReg)
3636     return true;
3637 
3638   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3639   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3640     return true;
3641 
3642   if (isRegIntersect(Src2Reg, DstReg, TRI)) {
3643     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3644           "source 2 operand must not partially overlap with dst");
3645     return false;
3646   }
3647 
3648   return true;
3649 }
3650 
3651 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3652   switch (Inst.getOpcode()) {
3653   default:
3654     return true;
3655   case V_DIV_SCALE_F32_gfx6_gfx7:
3656   case V_DIV_SCALE_F32_vi:
3657   case V_DIV_SCALE_F32_gfx10:
3658   case V_DIV_SCALE_F64_gfx6_gfx7:
3659   case V_DIV_SCALE_F64_vi:
3660   case V_DIV_SCALE_F64_gfx10:
3661     break;
3662   }
3663 
3664   // TODO: Check that src0 = src1 or src2.
3665 
3666   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3667                     AMDGPU::OpName::src2_modifiers,
3668                     AMDGPU::OpName::src2_modifiers}) {
3669     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3670             .getImm() &
3671         SISrcMods::ABS) {
3672       return false;
3673     }
3674   }
3675 
3676   return true;
3677 }
3678 
3679 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3680 
3681   const unsigned Opc = Inst.getOpcode();
3682   const MCInstrDesc &Desc = MII.get(Opc);
3683 
3684   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3685     return true;
3686 
3687   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3688   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3689     if (isCI() || isSI())
3690       return false;
3691   }
3692 
3693   return true;
3694 }
3695 
3696 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3697   const unsigned Opc = Inst.getOpcode();
3698   const MCInstrDesc &Desc = MII.get(Opc);
3699 
3700   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3701     return true;
3702 
3703   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3704   if (DimIdx < 0)
3705     return true;
3706 
3707   long Imm = Inst.getOperand(DimIdx).getImm();
3708   if (Imm < 0 || Imm >= 8)
3709     return false;
3710 
3711   return true;
3712 }
3713 
3714 static bool IsRevOpcode(const unsigned Opcode)
3715 {
3716   switch (Opcode) {
3717   case AMDGPU::V_SUBREV_F32_e32:
3718   case AMDGPU::V_SUBREV_F32_e64:
3719   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3720   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3721   case AMDGPU::V_SUBREV_F32_e32_vi:
3722   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3723   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3724   case AMDGPU::V_SUBREV_F32_e64_vi:
3725 
3726   case AMDGPU::V_SUBREV_CO_U32_e32:
3727   case AMDGPU::V_SUBREV_CO_U32_e64:
3728   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3729   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3730 
3731   case AMDGPU::V_SUBBREV_U32_e32:
3732   case AMDGPU::V_SUBBREV_U32_e64:
3733   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3734   case AMDGPU::V_SUBBREV_U32_e32_vi:
3735   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3736   case AMDGPU::V_SUBBREV_U32_e64_vi:
3737 
3738   case AMDGPU::V_SUBREV_U32_e32:
3739   case AMDGPU::V_SUBREV_U32_e64:
3740   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3741   case AMDGPU::V_SUBREV_U32_e32_vi:
3742   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3743   case AMDGPU::V_SUBREV_U32_e64_vi:
3744 
3745   case AMDGPU::V_SUBREV_F16_e32:
3746   case AMDGPU::V_SUBREV_F16_e64:
3747   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3748   case AMDGPU::V_SUBREV_F16_e32_vi:
3749   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3750   case AMDGPU::V_SUBREV_F16_e64_vi:
3751 
3752   case AMDGPU::V_SUBREV_U16_e32:
3753   case AMDGPU::V_SUBREV_U16_e64:
3754   case AMDGPU::V_SUBREV_U16_e32_vi:
3755   case AMDGPU::V_SUBREV_U16_e64_vi:
3756 
3757   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3758   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3759   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3760 
3761   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3762   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3763 
3764   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3765   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3766 
3767   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3768   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3769 
3770   case AMDGPU::V_LSHRREV_B32_e32:
3771   case AMDGPU::V_LSHRREV_B32_e64:
3772   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3773   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3774   case AMDGPU::V_LSHRREV_B32_e32_vi:
3775   case AMDGPU::V_LSHRREV_B32_e64_vi:
3776   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3777   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3778 
3779   case AMDGPU::V_ASHRREV_I32_e32:
3780   case AMDGPU::V_ASHRREV_I32_e64:
3781   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3782   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3783   case AMDGPU::V_ASHRREV_I32_e32_vi:
3784   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3785   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3786   case AMDGPU::V_ASHRREV_I32_e64_vi:
3787 
3788   case AMDGPU::V_LSHLREV_B32_e32:
3789   case AMDGPU::V_LSHLREV_B32_e64:
3790   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3791   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3792   case AMDGPU::V_LSHLREV_B32_e32_vi:
3793   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3794   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3795   case AMDGPU::V_LSHLREV_B32_e64_vi:
3796 
3797   case AMDGPU::V_LSHLREV_B16_e32:
3798   case AMDGPU::V_LSHLREV_B16_e64:
3799   case AMDGPU::V_LSHLREV_B16_e32_vi:
3800   case AMDGPU::V_LSHLREV_B16_e64_vi:
3801   case AMDGPU::V_LSHLREV_B16_gfx10:
3802 
3803   case AMDGPU::V_LSHRREV_B16_e32:
3804   case AMDGPU::V_LSHRREV_B16_e64:
3805   case AMDGPU::V_LSHRREV_B16_e32_vi:
3806   case AMDGPU::V_LSHRREV_B16_e64_vi:
3807   case AMDGPU::V_LSHRREV_B16_gfx10:
3808 
3809   case AMDGPU::V_ASHRREV_I16_e32:
3810   case AMDGPU::V_ASHRREV_I16_e64:
3811   case AMDGPU::V_ASHRREV_I16_e32_vi:
3812   case AMDGPU::V_ASHRREV_I16_e64_vi:
3813   case AMDGPU::V_ASHRREV_I16_gfx10:
3814 
3815   case AMDGPU::V_LSHLREV_B64_e64:
3816   case AMDGPU::V_LSHLREV_B64_gfx10:
3817   case AMDGPU::V_LSHLREV_B64_vi:
3818 
3819   case AMDGPU::V_LSHRREV_B64_e64:
3820   case AMDGPU::V_LSHRREV_B64_gfx10:
3821   case AMDGPU::V_LSHRREV_B64_vi:
3822 
3823   case AMDGPU::V_ASHRREV_I64_e64:
3824   case AMDGPU::V_ASHRREV_I64_gfx10:
3825   case AMDGPU::V_ASHRREV_I64_vi:
3826 
3827   case AMDGPU::V_PK_LSHLREV_B16:
3828   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3829   case AMDGPU::V_PK_LSHLREV_B16_vi:
3830 
3831   case AMDGPU::V_PK_LSHRREV_B16:
3832   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3833   case AMDGPU::V_PK_LSHRREV_B16_vi:
3834   case AMDGPU::V_PK_ASHRREV_I16:
3835   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3836   case AMDGPU::V_PK_ASHRREV_I16_vi:
3837     return true;
3838   default:
3839     return false;
3840   }
3841 }
3842 
3843 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3844 
3845   using namespace SIInstrFlags;
3846   const unsigned Opcode = Inst.getOpcode();
3847   const MCInstrDesc &Desc = MII.get(Opcode);
3848 
3849   // lds_direct register is defined so that it can be used
3850   // with 9-bit operands only. Ignore encodings which do not accept these.
3851   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3852   if ((Desc.TSFlags & Enc) == 0)
3853     return None;
3854 
3855   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3856     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3857     if (SrcIdx == -1)
3858       break;
3859     const auto &Src = Inst.getOperand(SrcIdx);
3860     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3861 
3862       if (isGFX90A())
3863         return StringRef("lds_direct is not supported on this GPU");
3864 
3865       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3866         return StringRef("lds_direct cannot be used with this instruction");
3867 
3868       if (SrcName != OpName::src0)
3869         return StringRef("lds_direct may be used as src0 only");
3870     }
3871   }
3872 
3873   return None;
3874 }
3875 
3876 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3877   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3878     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3879     if (Op.isFlatOffset())
3880       return Op.getStartLoc();
3881   }
3882   return getLoc();
3883 }
3884 
3885 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3886                                          const OperandVector &Operands) {
3887   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3888   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3889     return true;
3890 
3891   auto Opcode = Inst.getOpcode();
3892   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3893   assert(OpNum != -1);
3894 
3895   const auto &Op = Inst.getOperand(OpNum);
3896   if (!hasFlatOffsets() && Op.getImm() != 0) {
3897     Error(getFlatOffsetLoc(Operands),
3898           "flat offset modifier is not supported on this GPU");
3899     return false;
3900   }
3901 
3902   // For FLAT segment the offset must be positive;
3903   // MSB is ignored and forced to zero.
3904   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3905     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3906     if (!isIntN(OffsetSize, Op.getImm())) {
3907       Error(getFlatOffsetLoc(Operands),
3908             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3909       return false;
3910     }
3911   } else {
3912     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3913     if (!isUIntN(OffsetSize, Op.getImm())) {
3914       Error(getFlatOffsetLoc(Operands),
3915             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3916       return false;
3917     }
3918   }
3919 
3920   return true;
3921 }
3922 
3923 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3924   // Start with second operand because SMEM Offset cannot be dst or src0.
3925   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3926     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3927     if (Op.isSMEMOffset())
3928       return Op.getStartLoc();
3929   }
3930   return getLoc();
3931 }
3932 
3933 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3934                                          const OperandVector &Operands) {
3935   if (isCI() || isSI())
3936     return true;
3937 
3938   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3939   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3940     return true;
3941 
3942   auto Opcode = Inst.getOpcode();
3943   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3944   if (OpNum == -1)
3945     return true;
3946 
3947   const auto &Op = Inst.getOperand(OpNum);
3948   if (!Op.isImm())
3949     return true;
3950 
3951   uint64_t Offset = Op.getImm();
3952   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3953   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3954       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3955     return true;
3956 
3957   Error(getSMEMOffsetLoc(Operands),
3958         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3959                                "expected a 21-bit signed offset");
3960 
3961   return false;
3962 }
3963 
3964 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3965   unsigned Opcode = Inst.getOpcode();
3966   const MCInstrDesc &Desc = MII.get(Opcode);
3967   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3968     return true;
3969 
3970   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3971   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3972 
3973   const int OpIndices[] = { Src0Idx, Src1Idx };
3974 
3975   unsigned NumExprs = 0;
3976   unsigned NumLiterals = 0;
3977   uint32_t LiteralValue;
3978 
3979   for (int OpIdx : OpIndices) {
3980     if (OpIdx == -1) break;
3981 
3982     const MCOperand &MO = Inst.getOperand(OpIdx);
3983     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3984     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3985       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3986         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3987         if (NumLiterals == 0 || LiteralValue != Value) {
3988           LiteralValue = Value;
3989           ++NumLiterals;
3990         }
3991       } else if (MO.isExpr()) {
3992         ++NumExprs;
3993       }
3994     }
3995   }
3996 
3997   return NumLiterals + NumExprs <= 1;
3998 }
3999 
4000 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4001   const unsigned Opc = Inst.getOpcode();
4002   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4003       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4004     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4005     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4006 
4007     if (OpSel & ~3)
4008       return false;
4009   }
4010   return true;
4011 }
4012 
4013 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4014                                   const OperandVector &Operands) {
4015   const unsigned Opc = Inst.getOpcode();
4016   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4017   if (DppCtrlIdx < 0)
4018     return true;
4019   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4020 
4021   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4022     // DPP64 is supported for row_newbcast only.
4023     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4024     if (Src0Idx >= 0 &&
4025         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4026       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4027       Error(S, "64 bit dpp only supports row_newbcast");
4028       return false;
4029     }
4030   }
4031 
4032   return true;
4033 }
4034 
4035 // Check if VCC register matches wavefront size
4036 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4037   auto FB = getFeatureBits();
4038   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4039     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4040 }
4041 
4042 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4043 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4044                                          const OperandVector &Operands) {
4045   unsigned Opcode = Inst.getOpcode();
4046   const MCInstrDesc &Desc = MII.get(Opcode);
4047   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4048   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4049       ImmIdx == -1)
4050     return true;
4051 
4052   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4053   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4054   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4055 
4056   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4057 
4058   unsigned NumExprs = 0;
4059   unsigned NumLiterals = 0;
4060   uint32_t LiteralValue;
4061 
4062   for (int OpIdx : OpIndices) {
4063     if (OpIdx == -1)
4064       continue;
4065 
4066     const MCOperand &MO = Inst.getOperand(OpIdx);
4067     if (!MO.isImm() && !MO.isExpr())
4068       continue;
4069     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4070       continue;
4071 
4072     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4073         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4074       Error(getConstLoc(Operands),
4075             "inline constants are not allowed for this operand");
4076       return false;
4077     }
4078 
4079     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4080       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4081       if (NumLiterals == 0 || LiteralValue != Value) {
4082         LiteralValue = Value;
4083         ++NumLiterals;
4084       }
4085     } else if (MO.isExpr()) {
4086       ++NumExprs;
4087     }
4088   }
4089   NumLiterals += NumExprs;
4090 
4091   if (!NumLiterals)
4092     return true;
4093 
4094   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4095     Error(getLitLoc(Operands), "literal operands are not supported");
4096     return false;
4097   }
4098 
4099   if (NumLiterals > 1) {
4100     Error(getLitLoc(Operands), "only one literal operand is allowed");
4101     return false;
4102   }
4103 
4104   return true;
4105 }
4106 
4107 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4108 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4109                          const MCRegisterInfo *MRI) {
4110   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4111   if (OpIdx < 0)
4112     return -1;
4113 
4114   const MCOperand &Op = Inst.getOperand(OpIdx);
4115   if (!Op.isReg())
4116     return -1;
4117 
4118   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4119   auto Reg = Sub ? Sub : Op.getReg();
4120   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4121   return AGPR32.contains(Reg) ? 1 : 0;
4122 }
4123 
4124 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4125   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4126   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4127                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4128                   SIInstrFlags::DS)) == 0)
4129     return true;
4130 
4131   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4132                                                       : AMDGPU::OpName::vdata;
4133 
4134   const MCRegisterInfo *MRI = getMRI();
4135   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4136   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4137 
4138   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4139     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4140     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4141       return false;
4142   }
4143 
4144   auto FB = getFeatureBits();
4145   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4146     if (DataAreg < 0 || DstAreg < 0)
4147       return true;
4148     return DstAreg == DataAreg;
4149   }
4150 
4151   return DstAreg < 1 && DataAreg < 1;
4152 }
4153 
4154 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4155   auto FB = getFeatureBits();
4156   if (!FB[AMDGPU::FeatureGFX90AInsts])
4157     return true;
4158 
4159   const MCRegisterInfo *MRI = getMRI();
4160   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4161   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4162   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4163     const MCOperand &Op = Inst.getOperand(I);
4164     if (!Op.isReg())
4165       continue;
4166 
4167     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4168     if (!Sub)
4169       continue;
4170 
4171     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4172       return false;
4173     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4174       return false;
4175   }
4176 
4177   return true;
4178 }
4179 
4180 // gfx90a has an undocumented limitation:
4181 // DS_GWS opcodes must use even aligned registers.
4182 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4183                                   const OperandVector &Operands) {
4184   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4185     return true;
4186 
4187   int Opc = Inst.getOpcode();
4188   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4189       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4190     return true;
4191 
4192   const MCRegisterInfo *MRI = getMRI();
4193   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4194   int Data0Pos =
4195       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4196   assert(Data0Pos != -1);
4197   auto Reg = Inst.getOperand(Data0Pos).getReg();
4198   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4199   if (RegIdx & 1) {
4200     SMLoc RegLoc = getRegLoc(Reg, Operands);
4201     Error(RegLoc, "vgpr must be even aligned");
4202     return false;
4203   }
4204 
4205   return true;
4206 }
4207 
4208 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4209                                             const OperandVector &Operands,
4210                                             const SMLoc &IDLoc) {
4211   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4212                                            AMDGPU::OpName::cpol);
4213   if (CPolPos == -1)
4214     return true;
4215 
4216   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4217 
4218   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4219   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4220       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4221     Error(IDLoc, "invalid cache policy for SMRD instruction");
4222     return false;
4223   }
4224 
4225   if (isGFX90A() && (CPol & CPol::SCC)) {
4226     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4227     StringRef CStr(S.getPointer());
4228     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4229     Error(S, "scc is not supported on this GPU");
4230     return false;
4231   }
4232 
4233   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4234     return true;
4235 
4236   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4237     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4238       Error(IDLoc, "instruction must use glc");
4239       return false;
4240     }
4241   } else {
4242     if (CPol & CPol::GLC) {
4243       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4244       StringRef CStr(S.getPointer());
4245       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4246       Error(S, "instruction must not use glc");
4247       return false;
4248     }
4249   }
4250 
4251   return true;
4252 }
4253 
4254 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4255                                           const SMLoc &IDLoc,
4256                                           const OperandVector &Operands) {
4257   if (auto ErrMsg = validateLdsDirect(Inst)) {
4258     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4259     return false;
4260   }
4261   if (!validateSOPLiteral(Inst)) {
4262     Error(getLitLoc(Operands),
4263       "only one literal operand is allowed");
4264     return false;
4265   }
4266   if (!validateVOPLiteral(Inst, Operands)) {
4267     return false;
4268   }
4269   if (!validateConstantBusLimitations(Inst, Operands)) {
4270     return false;
4271   }
4272   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4273     return false;
4274   }
4275   if (!validateIntClampSupported(Inst)) {
4276     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4277       "integer clamping is not supported on this GPU");
4278     return false;
4279   }
4280   if (!validateOpSel(Inst)) {
4281     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4282       "invalid op_sel operand");
4283     return false;
4284   }
4285   if (!validateDPP(Inst, Operands)) {
4286     return false;
4287   }
4288   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4289   if (!validateMIMGD16(Inst)) {
4290     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4291       "d16 modifier is not supported on this GPU");
4292     return false;
4293   }
4294   if (!validateMIMGDim(Inst)) {
4295     Error(IDLoc, "dim modifier is required on this GPU");
4296     return false;
4297   }
4298   if (!validateMIMGMSAA(Inst)) {
4299     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4300           "invalid dim; must be MSAA type");
4301     return false;
4302   }
4303   if (!validateMIMGDataSize(Inst)) {
4304     Error(IDLoc,
4305       "image data size does not match dmask and tfe");
4306     return false;
4307   }
4308   if (!validateMIMGAddrSize(Inst)) {
4309     Error(IDLoc,
4310       "image address size does not match dim and a16");
4311     return false;
4312   }
4313   if (!validateMIMGAtomicDMask(Inst)) {
4314     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4315       "invalid atomic image dmask");
4316     return false;
4317   }
4318   if (!validateMIMGGatherDMask(Inst)) {
4319     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4320       "invalid image_gather dmask: only one bit must be set");
4321     return false;
4322   }
4323   if (!validateMovrels(Inst, Operands)) {
4324     return false;
4325   }
4326   if (!validateFlatOffset(Inst, Operands)) {
4327     return false;
4328   }
4329   if (!validateSMEMOffset(Inst, Operands)) {
4330     return false;
4331   }
4332   if (!validateMAIAccWrite(Inst, Operands)) {
4333     return false;
4334   }
4335   if (!validateMFMA(Inst, Operands)) {
4336     return false;
4337   }
4338   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4339     return false;
4340   }
4341 
4342   if (!validateAGPRLdSt(Inst)) {
4343     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4344     ? "invalid register class: data and dst should be all VGPR or AGPR"
4345     : "invalid register class: agpr loads and stores not supported on this GPU"
4346     );
4347     return false;
4348   }
4349   if (!validateVGPRAlign(Inst)) {
4350     Error(IDLoc,
4351       "invalid register class: vgpr tuples must be 64 bit aligned");
4352     return false;
4353   }
4354   if (!validateGWS(Inst, Operands)) {
4355     return false;
4356   }
4357 
4358   if (!validateDivScale(Inst)) {
4359     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4360     return false;
4361   }
4362   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4363     return false;
4364   }
4365 
4366   return true;
4367 }
4368 
4369 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4370                                             const FeatureBitset &FBS,
4371                                             unsigned VariantID = 0);
4372 
4373 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4374                                 const FeatureBitset &AvailableFeatures,
4375                                 unsigned VariantID);
4376 
4377 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4378                                        const FeatureBitset &FBS) {
4379   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4380 }
4381 
4382 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4383                                        const FeatureBitset &FBS,
4384                                        ArrayRef<unsigned> Variants) {
4385   for (auto Variant : Variants) {
4386     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4387       return true;
4388   }
4389 
4390   return false;
4391 }
4392 
4393 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4394                                                   const SMLoc &IDLoc) {
4395   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4396 
4397   // Check if requested instruction variant is supported.
4398   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4399     return false;
4400 
4401   // This instruction is not supported.
4402   // Clear any other pending errors because they are no longer relevant.
4403   getParser().clearPendingErrors();
4404 
4405   // Requested instruction variant is not supported.
4406   // Check if any other variants are supported.
4407   StringRef VariantName = getMatchedVariantName();
4408   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4409     return Error(IDLoc,
4410                  Twine(VariantName,
4411                        " variant of this instruction is not supported"));
4412   }
4413 
4414   // Finally check if this instruction is supported on any other GPU.
4415   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4416     return Error(IDLoc, "instruction not supported on this GPU");
4417   }
4418 
4419   // Instruction not supported on any GPU. Probably a typo.
4420   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4421   return Error(IDLoc, "invalid instruction" + Suggestion);
4422 }
4423 
4424 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4425                                               OperandVector &Operands,
4426                                               MCStreamer &Out,
4427                                               uint64_t &ErrorInfo,
4428                                               bool MatchingInlineAsm) {
4429   MCInst Inst;
4430   unsigned Result = Match_Success;
4431   for (auto Variant : getMatchedVariants()) {
4432     uint64_t EI;
4433     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4434                                   Variant);
4435     // We order match statuses from least to most specific. We use most specific
4436     // status as resulting
4437     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4438     if ((R == Match_Success) ||
4439         (R == Match_PreferE32) ||
4440         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4441         (R == Match_InvalidOperand && Result != Match_MissingFeature
4442                                    && Result != Match_PreferE32) ||
4443         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4444                                    && Result != Match_MissingFeature
4445                                    && Result != Match_PreferE32)) {
4446       Result = R;
4447       ErrorInfo = EI;
4448     }
4449     if (R == Match_Success)
4450       break;
4451   }
4452 
4453   if (Result == Match_Success) {
4454     if (!validateInstruction(Inst, IDLoc, Operands)) {
4455       return true;
4456     }
4457     Inst.setLoc(IDLoc);
4458     Out.emitInstruction(Inst, getSTI());
4459     return false;
4460   }
4461 
4462   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4463   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4464     return true;
4465   }
4466 
4467   switch (Result) {
4468   default: break;
4469   case Match_MissingFeature:
4470     // It has been verified that the specified instruction
4471     // mnemonic is valid. A match was found but it requires
4472     // features which are not supported on this GPU.
4473     return Error(IDLoc, "operands are not valid for this GPU or mode");
4474 
4475   case Match_InvalidOperand: {
4476     SMLoc ErrorLoc = IDLoc;
4477     if (ErrorInfo != ~0ULL) {
4478       if (ErrorInfo >= Operands.size()) {
4479         return Error(IDLoc, "too few operands for instruction");
4480       }
4481       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4482       if (ErrorLoc == SMLoc())
4483         ErrorLoc = IDLoc;
4484     }
4485     return Error(ErrorLoc, "invalid operand for instruction");
4486   }
4487 
4488   case Match_PreferE32:
4489     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4490                         "should be encoded as e32");
4491   case Match_MnemonicFail:
4492     llvm_unreachable("Invalid instructions should have been handled already");
4493   }
4494   llvm_unreachable("Implement any new match types added!");
4495 }
4496 
4497 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4498   int64_t Tmp = -1;
4499   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4500     return true;
4501   }
4502   if (getParser().parseAbsoluteExpression(Tmp)) {
4503     return true;
4504   }
4505   Ret = static_cast<uint32_t>(Tmp);
4506   return false;
4507 }
4508 
4509 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4510                                                uint32_t &Minor) {
4511   if (ParseAsAbsoluteExpression(Major))
4512     return TokError("invalid major version");
4513 
4514   if (!trySkipToken(AsmToken::Comma))
4515     return TokError("minor version number required, comma expected");
4516 
4517   if (ParseAsAbsoluteExpression(Minor))
4518     return TokError("invalid minor version");
4519 
4520   return false;
4521 }
4522 
4523 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4524   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4525     return TokError("directive only supported for amdgcn architecture");
4526 
4527   std::string TargetIDDirective;
4528   SMLoc TargetStart = getTok().getLoc();
4529   if (getParser().parseEscapedString(TargetIDDirective))
4530     return true;
4531 
4532   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4533   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4534     return getParser().Error(TargetRange.Start,
4535         (Twine(".amdgcn_target directive's target id ") +
4536          Twine(TargetIDDirective) +
4537          Twine(" does not match the specified target id ") +
4538          Twine(getTargetStreamer().getTargetID()->toString())).str());
4539 
4540   return false;
4541 }
4542 
4543 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4544   return Error(Range.Start, "value out of range", Range);
4545 }
4546 
4547 bool AMDGPUAsmParser::calculateGPRBlocks(
4548     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4549     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4550     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4551     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4552   // TODO(scott.linder): These calculations are duplicated from
4553   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4554   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4555 
4556   unsigned NumVGPRs = NextFreeVGPR;
4557   unsigned NumSGPRs = NextFreeSGPR;
4558 
4559   if (Version.Major >= 10)
4560     NumSGPRs = 0;
4561   else {
4562     unsigned MaxAddressableNumSGPRs =
4563         IsaInfo::getAddressableNumSGPRs(&getSTI());
4564 
4565     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4566         NumSGPRs > MaxAddressableNumSGPRs)
4567       return OutOfRangeError(SGPRRange);
4568 
4569     NumSGPRs +=
4570         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4571 
4572     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4573         NumSGPRs > MaxAddressableNumSGPRs)
4574       return OutOfRangeError(SGPRRange);
4575 
4576     if (Features.test(FeatureSGPRInitBug))
4577       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4578   }
4579 
4580   VGPRBlocks =
4581       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4582   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4583 
4584   return false;
4585 }
4586 
4587 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4588   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4589     return TokError("directive only supported for amdgcn architecture");
4590 
4591   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4592     return TokError("directive only supported for amdhsa OS");
4593 
4594   StringRef KernelName;
4595   if (getParser().parseIdentifier(KernelName))
4596     return true;
4597 
4598   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4599 
4600   StringSet<> Seen;
4601 
4602   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4603 
4604   SMRange VGPRRange;
4605   uint64_t NextFreeVGPR = 0;
4606   uint64_t AccumOffset = 0;
4607   SMRange SGPRRange;
4608   uint64_t NextFreeSGPR = 0;
4609 
4610   // Count the number of user SGPRs implied from the enabled feature bits.
4611   unsigned ImpliedUserSGPRCount = 0;
4612 
4613   // Track if the asm explicitly contains the directive for the user SGPR
4614   // count.
4615   Optional<unsigned> ExplicitUserSGPRCount;
4616   bool ReserveVCC = true;
4617   bool ReserveFlatScr = true;
4618   Optional<bool> EnableWavefrontSize32;
4619 
4620   while (true) {
4621     while (trySkipToken(AsmToken::EndOfStatement));
4622 
4623     StringRef ID;
4624     SMRange IDRange = getTok().getLocRange();
4625     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4626       return true;
4627 
4628     if (ID == ".end_amdhsa_kernel")
4629       break;
4630 
4631     if (Seen.find(ID) != Seen.end())
4632       return TokError(".amdhsa_ directives cannot be repeated");
4633     Seen.insert(ID);
4634 
4635     SMLoc ValStart = getLoc();
4636     int64_t IVal;
4637     if (getParser().parseAbsoluteExpression(IVal))
4638       return true;
4639     SMLoc ValEnd = getLoc();
4640     SMRange ValRange = SMRange(ValStart, ValEnd);
4641 
4642     if (IVal < 0)
4643       return OutOfRangeError(ValRange);
4644 
4645     uint64_t Val = IVal;
4646 
4647 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4648   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4649     return OutOfRangeError(RANGE);                                             \
4650   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4651 
4652     if (ID == ".amdhsa_group_segment_fixed_size") {
4653       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4654         return OutOfRangeError(ValRange);
4655       KD.group_segment_fixed_size = Val;
4656     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4657       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4658         return OutOfRangeError(ValRange);
4659       KD.private_segment_fixed_size = Val;
4660     } else if (ID == ".amdhsa_kernarg_size") {
4661       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4662         return OutOfRangeError(ValRange);
4663       KD.kernarg_size = Val;
4664     } else if (ID == ".amdhsa_user_sgpr_count") {
4665       ExplicitUserSGPRCount = Val;
4666     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4667       if (hasArchitectedFlatScratch())
4668         return Error(IDRange.Start,
4669                      "directive is not supported with architected flat scratch",
4670                      IDRange);
4671       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4672                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4673                        Val, ValRange);
4674       if (Val)
4675         ImpliedUserSGPRCount += 4;
4676     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4677       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4678                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4679                        ValRange);
4680       if (Val)
4681         ImpliedUserSGPRCount += 2;
4682     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4683       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4684                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4685                        ValRange);
4686       if (Val)
4687         ImpliedUserSGPRCount += 2;
4688     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4689       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4690                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4691                        Val, ValRange);
4692       if (Val)
4693         ImpliedUserSGPRCount += 2;
4694     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4695       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4696                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4697                        ValRange);
4698       if (Val)
4699         ImpliedUserSGPRCount += 2;
4700     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4701       if (hasArchitectedFlatScratch())
4702         return Error(IDRange.Start,
4703                      "directive is not supported with architected flat scratch",
4704                      IDRange);
4705       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4706                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4707                        ValRange);
4708       if (Val)
4709         ImpliedUserSGPRCount += 2;
4710     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4711       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4712                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4713                        Val, ValRange);
4714       if (Val)
4715         ImpliedUserSGPRCount += 1;
4716     } else if (ID == ".amdhsa_wavefront_size32") {
4717       if (IVersion.Major < 10)
4718         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4719       EnableWavefrontSize32 = Val;
4720       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4721                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4722                        Val, ValRange);
4723     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4724       if (hasArchitectedFlatScratch())
4725         return Error(IDRange.Start,
4726                      "directive is not supported with architected flat scratch",
4727                      IDRange);
4728       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4729                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4730     } else if (ID == ".amdhsa_enable_private_segment") {
4731       if (!hasArchitectedFlatScratch())
4732         return Error(
4733             IDRange.Start,
4734             "directive is not supported without architected flat scratch",
4735             IDRange);
4736       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4737                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4738     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4740                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4741                        ValRange);
4742     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4744                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4745                        ValRange);
4746     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4747       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4748                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4749                        ValRange);
4750     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4751       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4752                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4753                        ValRange);
4754     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4755       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4756                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4757                        ValRange);
4758     } else if (ID == ".amdhsa_next_free_vgpr") {
4759       VGPRRange = ValRange;
4760       NextFreeVGPR = Val;
4761     } else if (ID == ".amdhsa_next_free_sgpr") {
4762       SGPRRange = ValRange;
4763       NextFreeSGPR = Val;
4764     } else if (ID == ".amdhsa_accum_offset") {
4765       if (!isGFX90A())
4766         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4767       AccumOffset = Val;
4768     } else if (ID == ".amdhsa_reserve_vcc") {
4769       if (!isUInt<1>(Val))
4770         return OutOfRangeError(ValRange);
4771       ReserveVCC = Val;
4772     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4773       if (IVersion.Major < 7)
4774         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4775       if (hasArchitectedFlatScratch())
4776         return Error(IDRange.Start,
4777                      "directive is not supported with architected flat scratch",
4778                      IDRange);
4779       if (!isUInt<1>(Val))
4780         return OutOfRangeError(ValRange);
4781       ReserveFlatScr = Val;
4782     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4783       if (IVersion.Major < 8)
4784         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4785       if (!isUInt<1>(Val))
4786         return OutOfRangeError(ValRange);
4787       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4788         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4789                                  IDRange);
4790     } else if (ID == ".amdhsa_float_round_mode_32") {
4791       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4792                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4793     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4794       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4795                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4796     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4798                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4799     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4800       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4801                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4802                        ValRange);
4803     } else if (ID == ".amdhsa_dx10_clamp") {
4804       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4805                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4806     } else if (ID == ".amdhsa_ieee_mode") {
4807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4808                        Val, ValRange);
4809     } else if (ID == ".amdhsa_fp16_overflow") {
4810       if (IVersion.Major < 9)
4811         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4812       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4813                        ValRange);
4814     } else if (ID == ".amdhsa_tg_split") {
4815       if (!isGFX90A())
4816         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4817       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4818                        ValRange);
4819     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4820       if (IVersion.Major < 10)
4821         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4822       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4823                        ValRange);
4824     } else if (ID == ".amdhsa_memory_ordered") {
4825       if (IVersion.Major < 10)
4826         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4827       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4828                        ValRange);
4829     } else if (ID == ".amdhsa_forward_progress") {
4830       if (IVersion.Major < 10)
4831         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4832       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4833                        ValRange);
4834     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4835       PARSE_BITS_ENTRY(
4836           KD.compute_pgm_rsrc2,
4837           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4838           ValRange);
4839     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4840       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4841                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4842                        Val, ValRange);
4843     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4844       PARSE_BITS_ENTRY(
4845           KD.compute_pgm_rsrc2,
4846           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4847           ValRange);
4848     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4849       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4850                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4851                        Val, ValRange);
4852     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4853       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4854                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4855                        Val, ValRange);
4856     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4857       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4858                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4859                        Val, ValRange);
4860     } else if (ID == ".amdhsa_exception_int_div_zero") {
4861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4862                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4863                        Val, ValRange);
4864     } else {
4865       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4866     }
4867 
4868 #undef PARSE_BITS_ENTRY
4869   }
4870 
4871   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4872     return TokError(".amdhsa_next_free_vgpr directive is required");
4873 
4874   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4875     return TokError(".amdhsa_next_free_sgpr directive is required");
4876 
4877   unsigned VGPRBlocks;
4878   unsigned SGPRBlocks;
4879   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4880                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4881                          EnableWavefrontSize32, NextFreeVGPR,
4882                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4883                          SGPRBlocks))
4884     return true;
4885 
4886   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4887           VGPRBlocks))
4888     return OutOfRangeError(VGPRRange);
4889   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4890                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4891 
4892   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4893           SGPRBlocks))
4894     return OutOfRangeError(SGPRRange);
4895   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4896                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4897                   SGPRBlocks);
4898 
4899   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4900     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4901                     "enabled user SGPRs");
4902 
4903   unsigned UserSGPRCount =
4904       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4905 
4906   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4907     return TokError("too many user SGPRs enabled");
4908   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4909                   UserSGPRCount);
4910 
4911   if (isGFX90A()) {
4912     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4913       return TokError(".amdhsa_accum_offset directive is required");
4914     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4915       return TokError("accum_offset should be in range [4..256] in "
4916                       "increments of 4");
4917     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4918       return TokError("accum_offset exceeds total VGPR allocation");
4919     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4920                     (AccumOffset / 4 - 1));
4921   }
4922 
4923   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4924       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4925       ReserveFlatScr);
4926   return false;
4927 }
4928 
4929 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4930   uint32_t Major;
4931   uint32_t Minor;
4932 
4933   if (ParseDirectiveMajorMinor(Major, Minor))
4934     return true;
4935 
4936   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4937   return false;
4938 }
4939 
4940 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4941   uint32_t Major;
4942   uint32_t Minor;
4943   uint32_t Stepping;
4944   StringRef VendorName;
4945   StringRef ArchName;
4946 
4947   // If this directive has no arguments, then use the ISA version for the
4948   // targeted GPU.
4949   if (isToken(AsmToken::EndOfStatement)) {
4950     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4951     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4952                                                         ISA.Stepping,
4953                                                         "AMD", "AMDGPU");
4954     return false;
4955   }
4956 
4957   if (ParseDirectiveMajorMinor(Major, Minor))
4958     return true;
4959 
4960   if (!trySkipToken(AsmToken::Comma))
4961     return TokError("stepping version number required, comma expected");
4962 
4963   if (ParseAsAbsoluteExpression(Stepping))
4964     return TokError("invalid stepping version");
4965 
4966   if (!trySkipToken(AsmToken::Comma))
4967     return TokError("vendor name required, comma expected");
4968 
4969   if (!parseString(VendorName, "invalid vendor name"))
4970     return true;
4971 
4972   if (!trySkipToken(AsmToken::Comma))
4973     return TokError("arch name required, comma expected");
4974 
4975   if (!parseString(ArchName, "invalid arch name"))
4976     return true;
4977 
4978   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4979                                                       VendorName, ArchName);
4980   return false;
4981 }
4982 
4983 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4984                                                amd_kernel_code_t &Header) {
4985   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4986   // assembly for backwards compatibility.
4987   if (ID == "max_scratch_backing_memory_byte_size") {
4988     Parser.eatToEndOfStatement();
4989     return false;
4990   }
4991 
4992   SmallString<40> ErrStr;
4993   raw_svector_ostream Err(ErrStr);
4994   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4995     return TokError(Err.str());
4996   }
4997   Lex();
4998 
4999   if (ID == "enable_wavefront_size32") {
5000     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5001       if (!isGFX10Plus())
5002         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5003       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5004         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5005     } else {
5006       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5007         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5008     }
5009   }
5010 
5011   if (ID == "wavefront_size") {
5012     if (Header.wavefront_size == 5) {
5013       if (!isGFX10Plus())
5014         return TokError("wavefront_size=5 is only allowed on GFX10+");
5015       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5016         return TokError("wavefront_size=5 requires +WavefrontSize32");
5017     } else if (Header.wavefront_size == 6) {
5018       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5019         return TokError("wavefront_size=6 requires +WavefrontSize64");
5020     }
5021   }
5022 
5023   if (ID == "enable_wgp_mode") {
5024     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5025         !isGFX10Plus())
5026       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5027   }
5028 
5029   if (ID == "enable_mem_ordered") {
5030     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5031         !isGFX10Plus())
5032       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5033   }
5034 
5035   if (ID == "enable_fwd_progress") {
5036     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5037         !isGFX10Plus())
5038       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5039   }
5040 
5041   return false;
5042 }
5043 
5044 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5045   amd_kernel_code_t Header;
5046   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5047 
5048   while (true) {
5049     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5050     // will set the current token to EndOfStatement.
5051     while(trySkipToken(AsmToken::EndOfStatement));
5052 
5053     StringRef ID;
5054     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5055       return true;
5056 
5057     if (ID == ".end_amd_kernel_code_t")
5058       break;
5059 
5060     if (ParseAMDKernelCodeTValue(ID, Header))
5061       return true;
5062   }
5063 
5064   getTargetStreamer().EmitAMDKernelCodeT(Header);
5065 
5066   return false;
5067 }
5068 
5069 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5070   StringRef KernelName;
5071   if (!parseId(KernelName, "expected symbol name"))
5072     return true;
5073 
5074   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5075                                            ELF::STT_AMDGPU_HSA_KERNEL);
5076 
5077   KernelScope.initialize(getContext());
5078   return false;
5079 }
5080 
5081 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5082   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5083     return Error(getLoc(),
5084                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5085                  "architectures");
5086   }
5087 
5088   auto TargetIDDirective = getLexer().getTok().getStringContents();
5089   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5090     return Error(getParser().getTok().getLoc(), "target id must match options");
5091 
5092   getTargetStreamer().EmitISAVersion();
5093   Lex();
5094 
5095   return false;
5096 }
5097 
5098 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5099   const char *AssemblerDirectiveBegin;
5100   const char *AssemblerDirectiveEnd;
5101   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5102       isHsaAbiVersion3Or4(&getSTI())
5103           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5104                             HSAMD::V3::AssemblerDirectiveEnd)
5105           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5106                             HSAMD::AssemblerDirectiveEnd);
5107 
5108   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5109     return Error(getLoc(),
5110                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5111                  "not available on non-amdhsa OSes")).str());
5112   }
5113 
5114   std::string HSAMetadataString;
5115   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5116                           HSAMetadataString))
5117     return true;
5118 
5119   if (isHsaAbiVersion3Or4(&getSTI())) {
5120     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5121       return Error(getLoc(), "invalid HSA metadata");
5122   } else {
5123     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5124       return Error(getLoc(), "invalid HSA metadata");
5125   }
5126 
5127   return false;
5128 }
5129 
5130 /// Common code to parse out a block of text (typically YAML) between start and
5131 /// end directives.
5132 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5133                                           const char *AssemblerDirectiveEnd,
5134                                           std::string &CollectString) {
5135 
5136   raw_string_ostream CollectStream(CollectString);
5137 
5138   getLexer().setSkipSpace(false);
5139 
5140   bool FoundEnd = false;
5141   while (!isToken(AsmToken::Eof)) {
5142     while (isToken(AsmToken::Space)) {
5143       CollectStream << getTokenStr();
5144       Lex();
5145     }
5146 
5147     if (trySkipId(AssemblerDirectiveEnd)) {
5148       FoundEnd = true;
5149       break;
5150     }
5151 
5152     CollectStream << Parser.parseStringToEndOfStatement()
5153                   << getContext().getAsmInfo()->getSeparatorString();
5154 
5155     Parser.eatToEndOfStatement();
5156   }
5157 
5158   getLexer().setSkipSpace(true);
5159 
5160   if (isToken(AsmToken::Eof) && !FoundEnd) {
5161     return TokError(Twine("expected directive ") +
5162                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5163   }
5164 
5165   CollectStream.flush();
5166   return false;
5167 }
5168 
5169 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5170 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5171   std::string String;
5172   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5173                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5174     return true;
5175 
5176   auto PALMetadata = getTargetStreamer().getPALMetadata();
5177   if (!PALMetadata->setFromString(String))
5178     return Error(getLoc(), "invalid PAL metadata");
5179   return false;
5180 }
5181 
5182 /// Parse the assembler directive for old linear-format PAL metadata.
5183 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5184   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5185     return Error(getLoc(),
5186                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5187                  "not available on non-amdpal OSes")).str());
5188   }
5189 
5190   auto PALMetadata = getTargetStreamer().getPALMetadata();
5191   PALMetadata->setLegacy();
5192   for (;;) {
5193     uint32_t Key, Value;
5194     if (ParseAsAbsoluteExpression(Key)) {
5195       return TokError(Twine("invalid value in ") +
5196                       Twine(PALMD::AssemblerDirective));
5197     }
5198     if (!trySkipToken(AsmToken::Comma)) {
5199       return TokError(Twine("expected an even number of values in ") +
5200                       Twine(PALMD::AssemblerDirective));
5201     }
5202     if (ParseAsAbsoluteExpression(Value)) {
5203       return TokError(Twine("invalid value in ") +
5204                       Twine(PALMD::AssemblerDirective));
5205     }
5206     PALMetadata->setRegister(Key, Value);
5207     if (!trySkipToken(AsmToken::Comma))
5208       break;
5209   }
5210   return false;
5211 }
5212 
5213 /// ParseDirectiveAMDGPULDS
5214 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5215 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5216   if (getParser().checkForValidSection())
5217     return true;
5218 
5219   StringRef Name;
5220   SMLoc NameLoc = getLoc();
5221   if (getParser().parseIdentifier(Name))
5222     return TokError("expected identifier in directive");
5223 
5224   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5225   if (parseToken(AsmToken::Comma, "expected ','"))
5226     return true;
5227 
5228   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5229 
5230   int64_t Size;
5231   SMLoc SizeLoc = getLoc();
5232   if (getParser().parseAbsoluteExpression(Size))
5233     return true;
5234   if (Size < 0)
5235     return Error(SizeLoc, "size must be non-negative");
5236   if (Size > LocalMemorySize)
5237     return Error(SizeLoc, "size is too large");
5238 
5239   int64_t Alignment = 4;
5240   if (trySkipToken(AsmToken::Comma)) {
5241     SMLoc AlignLoc = getLoc();
5242     if (getParser().parseAbsoluteExpression(Alignment))
5243       return true;
5244     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5245       return Error(AlignLoc, "alignment must be a power of two");
5246 
5247     // Alignment larger than the size of LDS is possible in theory, as long
5248     // as the linker manages to place to symbol at address 0, but we do want
5249     // to make sure the alignment fits nicely into a 32-bit integer.
5250     if (Alignment >= 1u << 31)
5251       return Error(AlignLoc, "alignment is too large");
5252   }
5253 
5254   if (parseToken(AsmToken::EndOfStatement,
5255                  "unexpected token in '.amdgpu_lds' directive"))
5256     return true;
5257 
5258   Symbol->redefineIfPossible();
5259   if (!Symbol->isUndefined())
5260     return Error(NameLoc, "invalid symbol redefinition");
5261 
5262   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5263   return false;
5264 }
5265 
5266 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5267   StringRef IDVal = DirectiveID.getString();
5268 
5269   if (isHsaAbiVersion3Or4(&getSTI())) {
5270     if (IDVal == ".amdhsa_kernel")
5271      return ParseDirectiveAMDHSAKernel();
5272 
5273     // TODO: Restructure/combine with PAL metadata directive.
5274     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5275       return ParseDirectiveHSAMetadata();
5276   } else {
5277     if (IDVal == ".hsa_code_object_version")
5278       return ParseDirectiveHSACodeObjectVersion();
5279 
5280     if (IDVal == ".hsa_code_object_isa")
5281       return ParseDirectiveHSACodeObjectISA();
5282 
5283     if (IDVal == ".amd_kernel_code_t")
5284       return ParseDirectiveAMDKernelCodeT();
5285 
5286     if (IDVal == ".amdgpu_hsa_kernel")
5287       return ParseDirectiveAMDGPUHsaKernel();
5288 
5289     if (IDVal == ".amd_amdgpu_isa")
5290       return ParseDirectiveISAVersion();
5291 
5292     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5293       return ParseDirectiveHSAMetadata();
5294   }
5295 
5296   if (IDVal == ".amdgcn_target")
5297     return ParseDirectiveAMDGCNTarget();
5298 
5299   if (IDVal == ".amdgpu_lds")
5300     return ParseDirectiveAMDGPULDS();
5301 
5302   if (IDVal == PALMD::AssemblerDirectiveBegin)
5303     return ParseDirectivePALMetadataBegin();
5304 
5305   if (IDVal == PALMD::AssemblerDirective)
5306     return ParseDirectivePALMetadata();
5307 
5308   return true;
5309 }
5310 
5311 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5312                                            unsigned RegNo) {
5313 
5314   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5315        R.isValid(); ++R) {
5316     if (*R == RegNo)
5317       return isGFX9Plus();
5318   }
5319 
5320   // GFX10 has 2 more SGPRs 104 and 105.
5321   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5322        R.isValid(); ++R) {
5323     if (*R == RegNo)
5324       return hasSGPR104_SGPR105();
5325   }
5326 
5327   switch (RegNo) {
5328   case AMDGPU::SRC_SHARED_BASE:
5329   case AMDGPU::SRC_SHARED_LIMIT:
5330   case AMDGPU::SRC_PRIVATE_BASE:
5331   case AMDGPU::SRC_PRIVATE_LIMIT:
5332   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5333     return isGFX9Plus();
5334   case AMDGPU::TBA:
5335   case AMDGPU::TBA_LO:
5336   case AMDGPU::TBA_HI:
5337   case AMDGPU::TMA:
5338   case AMDGPU::TMA_LO:
5339   case AMDGPU::TMA_HI:
5340     return !isGFX9Plus();
5341   case AMDGPU::XNACK_MASK:
5342   case AMDGPU::XNACK_MASK_LO:
5343   case AMDGPU::XNACK_MASK_HI:
5344     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5345   case AMDGPU::SGPR_NULL:
5346     return isGFX10Plus();
5347   default:
5348     break;
5349   }
5350 
5351   if (isCI())
5352     return true;
5353 
5354   if (isSI() || isGFX10Plus()) {
5355     // No flat_scr on SI.
5356     // On GFX10 flat scratch is not a valid register operand and can only be
5357     // accessed with s_setreg/s_getreg.
5358     switch (RegNo) {
5359     case AMDGPU::FLAT_SCR:
5360     case AMDGPU::FLAT_SCR_LO:
5361     case AMDGPU::FLAT_SCR_HI:
5362       return false;
5363     default:
5364       return true;
5365     }
5366   }
5367 
5368   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5369   // SI/CI have.
5370   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5371        R.isValid(); ++R) {
5372     if (*R == RegNo)
5373       return hasSGPR102_SGPR103();
5374   }
5375 
5376   return true;
5377 }
5378 
5379 OperandMatchResultTy
5380 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5381                               OperandMode Mode) {
5382   // Try to parse with a custom parser
5383   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5384 
5385   // If we successfully parsed the operand or if there as an error parsing,
5386   // we are done.
5387   //
5388   // If we are parsing after we reach EndOfStatement then this means we
5389   // are appending default values to the Operands list.  This is only done
5390   // by custom parser, so we shouldn't continue on to the generic parsing.
5391   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5392       isToken(AsmToken::EndOfStatement))
5393     return ResTy;
5394 
5395   SMLoc RBraceLoc;
5396   SMLoc LBraceLoc = getLoc();
5397   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5398     unsigned Prefix = Operands.size();
5399 
5400     for (;;) {
5401       auto Loc = getLoc();
5402       ResTy = parseReg(Operands);
5403       if (ResTy == MatchOperand_NoMatch)
5404         Error(Loc, "expected a register");
5405       if (ResTy != MatchOperand_Success)
5406         return MatchOperand_ParseFail;
5407 
5408       RBraceLoc = getLoc();
5409       if (trySkipToken(AsmToken::RBrac))
5410         break;
5411 
5412       if (!skipToken(AsmToken::Comma,
5413                      "expected a comma or a closing square bracket")) {
5414         return MatchOperand_ParseFail;
5415       }
5416     }
5417 
5418     if (Operands.size() - Prefix > 1) {
5419       Operands.insert(Operands.begin() + Prefix,
5420                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5421       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5422     }
5423 
5424     return MatchOperand_Success;
5425   }
5426 
5427   return parseRegOrImm(Operands);
5428 }
5429 
5430 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5431   // Clear any forced encodings from the previous instruction.
5432   setForcedEncodingSize(0);
5433   setForcedDPP(false);
5434   setForcedSDWA(false);
5435 
5436   if (Name.endswith("_e64")) {
5437     setForcedEncodingSize(64);
5438     return Name.substr(0, Name.size() - 4);
5439   } else if (Name.endswith("_e32")) {
5440     setForcedEncodingSize(32);
5441     return Name.substr(0, Name.size() - 4);
5442   } else if (Name.endswith("_dpp")) {
5443     setForcedDPP(true);
5444     return Name.substr(0, Name.size() - 4);
5445   } else if (Name.endswith("_sdwa")) {
5446     setForcedSDWA(true);
5447     return Name.substr(0, Name.size() - 5);
5448   }
5449   return Name;
5450 }
5451 
5452 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5453                                        StringRef Name,
5454                                        SMLoc NameLoc, OperandVector &Operands) {
5455   // Add the instruction mnemonic
5456   Name = parseMnemonicSuffix(Name);
5457   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5458 
5459   bool IsMIMG = Name.startswith("image_");
5460 
5461   while (!trySkipToken(AsmToken::EndOfStatement)) {
5462     OperandMode Mode = OperandMode_Default;
5463     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5464       Mode = OperandMode_NSA;
5465     CPolSeen = 0;
5466     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5467 
5468     if (Res != MatchOperand_Success) {
5469       checkUnsupportedInstruction(Name, NameLoc);
5470       if (!Parser.hasPendingError()) {
5471         // FIXME: use real operand location rather than the current location.
5472         StringRef Msg =
5473           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5474                                             "not a valid operand.";
5475         Error(getLoc(), Msg);
5476       }
5477       while (!trySkipToken(AsmToken::EndOfStatement)) {
5478         lex();
5479       }
5480       return true;
5481     }
5482 
5483     // Eat the comma or space if there is one.
5484     trySkipToken(AsmToken::Comma);
5485   }
5486 
5487   return false;
5488 }
5489 
5490 //===----------------------------------------------------------------------===//
5491 // Utility functions
5492 //===----------------------------------------------------------------------===//
5493 
5494 OperandMatchResultTy
5495 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5496 
5497   if (!trySkipId(Prefix, AsmToken::Colon))
5498     return MatchOperand_NoMatch;
5499 
5500   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5501 }
5502 
5503 OperandMatchResultTy
5504 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5505                                     AMDGPUOperand::ImmTy ImmTy,
5506                                     bool (*ConvertResult)(int64_t&)) {
5507   SMLoc S = getLoc();
5508   int64_t Value = 0;
5509 
5510   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5511   if (Res != MatchOperand_Success)
5512     return Res;
5513 
5514   if (ConvertResult && !ConvertResult(Value)) {
5515     Error(S, "invalid " + StringRef(Prefix) + " value.");
5516   }
5517 
5518   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5519   return MatchOperand_Success;
5520 }
5521 
5522 OperandMatchResultTy
5523 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5524                                              OperandVector &Operands,
5525                                              AMDGPUOperand::ImmTy ImmTy,
5526                                              bool (*ConvertResult)(int64_t&)) {
5527   SMLoc S = getLoc();
5528   if (!trySkipId(Prefix, AsmToken::Colon))
5529     return MatchOperand_NoMatch;
5530 
5531   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5532     return MatchOperand_ParseFail;
5533 
5534   unsigned Val = 0;
5535   const unsigned MaxSize = 4;
5536 
5537   // FIXME: How to verify the number of elements matches the number of src
5538   // operands?
5539   for (int I = 0; ; ++I) {
5540     int64_t Op;
5541     SMLoc Loc = getLoc();
5542     if (!parseExpr(Op))
5543       return MatchOperand_ParseFail;
5544 
5545     if (Op != 0 && Op != 1) {
5546       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5547       return MatchOperand_ParseFail;
5548     }
5549 
5550     Val |= (Op << I);
5551 
5552     if (trySkipToken(AsmToken::RBrac))
5553       break;
5554 
5555     if (I + 1 == MaxSize) {
5556       Error(getLoc(), "expected a closing square bracket");
5557       return MatchOperand_ParseFail;
5558     }
5559 
5560     if (!skipToken(AsmToken::Comma, "expected a comma"))
5561       return MatchOperand_ParseFail;
5562   }
5563 
5564   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5565   return MatchOperand_Success;
5566 }
5567 
5568 OperandMatchResultTy
5569 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5570                                AMDGPUOperand::ImmTy ImmTy) {
5571   int64_t Bit;
5572   SMLoc S = getLoc();
5573 
5574   if (trySkipId(Name)) {
5575     Bit = 1;
5576   } else if (trySkipId("no", Name)) {
5577     Bit = 0;
5578   } else {
5579     return MatchOperand_NoMatch;
5580   }
5581 
5582   if (Name == "r128" && !hasMIMG_R128()) {
5583     Error(S, "r128 modifier is not supported on this GPU");
5584     return MatchOperand_ParseFail;
5585   }
5586   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5587     Error(S, "a16 modifier is not supported on this GPU");
5588     return MatchOperand_ParseFail;
5589   }
5590 
5591   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5592     ImmTy = AMDGPUOperand::ImmTyR128A16;
5593 
5594   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5595   return MatchOperand_Success;
5596 }
5597 
5598 OperandMatchResultTy
5599 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5600   unsigned CPolOn = 0;
5601   unsigned CPolOff = 0;
5602   SMLoc S = getLoc();
5603 
5604   if (trySkipId("glc"))
5605     CPolOn = AMDGPU::CPol::GLC;
5606   else if (trySkipId("noglc"))
5607     CPolOff = AMDGPU::CPol::GLC;
5608   else if (trySkipId("slc"))
5609     CPolOn = AMDGPU::CPol::SLC;
5610   else if (trySkipId("noslc"))
5611     CPolOff = AMDGPU::CPol::SLC;
5612   else if (trySkipId("dlc"))
5613     CPolOn = AMDGPU::CPol::DLC;
5614   else if (trySkipId("nodlc"))
5615     CPolOff = AMDGPU::CPol::DLC;
5616   else if (trySkipId("scc"))
5617     CPolOn = AMDGPU::CPol::SCC;
5618   else if (trySkipId("noscc"))
5619     CPolOff = AMDGPU::CPol::SCC;
5620   else
5621     return MatchOperand_NoMatch;
5622 
5623   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5624     Error(S, "dlc modifier is not supported on this GPU");
5625     return MatchOperand_ParseFail;
5626   }
5627 
5628   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5629     Error(S, "scc modifier is not supported on this GPU");
5630     return MatchOperand_ParseFail;
5631   }
5632 
5633   if (CPolSeen & (CPolOn | CPolOff)) {
5634     Error(S, "duplicate cache policy modifier");
5635     return MatchOperand_ParseFail;
5636   }
5637 
5638   CPolSeen |= (CPolOn | CPolOff);
5639 
5640   for (unsigned I = 1; I != Operands.size(); ++I) {
5641     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5642     if (Op.isCPol()) {
5643       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5644       return MatchOperand_Success;
5645     }
5646   }
5647 
5648   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5649                                               AMDGPUOperand::ImmTyCPol));
5650 
5651   return MatchOperand_Success;
5652 }
5653 
5654 static void addOptionalImmOperand(
5655   MCInst& Inst, const OperandVector& Operands,
5656   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5657   AMDGPUOperand::ImmTy ImmT,
5658   int64_t Default = 0) {
5659   auto i = OptionalIdx.find(ImmT);
5660   if (i != OptionalIdx.end()) {
5661     unsigned Idx = i->second;
5662     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5663   } else {
5664     Inst.addOperand(MCOperand::createImm(Default));
5665   }
5666 }
5667 
5668 OperandMatchResultTy
5669 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5670                                        StringRef &Value,
5671                                        SMLoc &StringLoc) {
5672   if (!trySkipId(Prefix, AsmToken::Colon))
5673     return MatchOperand_NoMatch;
5674 
5675   StringLoc = getLoc();
5676   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5677                                                   : MatchOperand_ParseFail;
5678 }
5679 
5680 //===----------------------------------------------------------------------===//
5681 // MTBUF format
5682 //===----------------------------------------------------------------------===//
5683 
5684 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5685                                   int64_t MaxVal,
5686                                   int64_t &Fmt) {
5687   int64_t Val;
5688   SMLoc Loc = getLoc();
5689 
5690   auto Res = parseIntWithPrefix(Pref, Val);
5691   if (Res == MatchOperand_ParseFail)
5692     return false;
5693   if (Res == MatchOperand_NoMatch)
5694     return true;
5695 
5696   if (Val < 0 || Val > MaxVal) {
5697     Error(Loc, Twine("out of range ", StringRef(Pref)));
5698     return false;
5699   }
5700 
5701   Fmt = Val;
5702   return true;
5703 }
5704 
5705 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5706 // values to live in a joint format operand in the MCInst encoding.
5707 OperandMatchResultTy
5708 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5709   using namespace llvm::AMDGPU::MTBUFFormat;
5710 
5711   int64_t Dfmt = DFMT_UNDEF;
5712   int64_t Nfmt = NFMT_UNDEF;
5713 
5714   // dfmt and nfmt can appear in either order, and each is optional.
5715   for (int I = 0; I < 2; ++I) {
5716     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5717       return MatchOperand_ParseFail;
5718 
5719     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5720       return MatchOperand_ParseFail;
5721     }
5722     // Skip optional comma between dfmt/nfmt
5723     // but guard against 2 commas following each other.
5724     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5725         !peekToken().is(AsmToken::Comma)) {
5726       trySkipToken(AsmToken::Comma);
5727     }
5728   }
5729 
5730   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5731     return MatchOperand_NoMatch;
5732 
5733   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5734   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5735 
5736   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5737   return MatchOperand_Success;
5738 }
5739 
5740 OperandMatchResultTy
5741 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5742   using namespace llvm::AMDGPU::MTBUFFormat;
5743 
5744   int64_t Fmt = UFMT_UNDEF;
5745 
5746   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5747     return MatchOperand_ParseFail;
5748 
5749   if (Fmt == UFMT_UNDEF)
5750     return MatchOperand_NoMatch;
5751 
5752   Format = Fmt;
5753   return MatchOperand_Success;
5754 }
5755 
5756 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5757                                     int64_t &Nfmt,
5758                                     StringRef FormatStr,
5759                                     SMLoc Loc) {
5760   using namespace llvm::AMDGPU::MTBUFFormat;
5761   int64_t Format;
5762 
5763   Format = getDfmt(FormatStr);
5764   if (Format != DFMT_UNDEF) {
5765     Dfmt = Format;
5766     return true;
5767   }
5768 
5769   Format = getNfmt(FormatStr, getSTI());
5770   if (Format != NFMT_UNDEF) {
5771     Nfmt = Format;
5772     return true;
5773   }
5774 
5775   Error(Loc, "unsupported format");
5776   return false;
5777 }
5778 
5779 OperandMatchResultTy
5780 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5781                                           SMLoc FormatLoc,
5782                                           int64_t &Format) {
5783   using namespace llvm::AMDGPU::MTBUFFormat;
5784 
5785   int64_t Dfmt = DFMT_UNDEF;
5786   int64_t Nfmt = NFMT_UNDEF;
5787   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5788     return MatchOperand_ParseFail;
5789 
5790   if (trySkipToken(AsmToken::Comma)) {
5791     StringRef Str;
5792     SMLoc Loc = getLoc();
5793     if (!parseId(Str, "expected a format string") ||
5794         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5795       return MatchOperand_ParseFail;
5796     }
5797     if (Dfmt == DFMT_UNDEF) {
5798       Error(Loc, "duplicate numeric format");
5799       return MatchOperand_ParseFail;
5800     } else if (Nfmt == NFMT_UNDEF) {
5801       Error(Loc, "duplicate data format");
5802       return MatchOperand_ParseFail;
5803     }
5804   }
5805 
5806   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5807   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5808 
5809   if (isGFX10Plus()) {
5810     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5811     if (Ufmt == UFMT_UNDEF) {
5812       Error(FormatLoc, "unsupported format");
5813       return MatchOperand_ParseFail;
5814     }
5815     Format = Ufmt;
5816   } else {
5817     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5818   }
5819 
5820   return MatchOperand_Success;
5821 }
5822 
5823 OperandMatchResultTy
5824 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5825                                             SMLoc Loc,
5826                                             int64_t &Format) {
5827   using namespace llvm::AMDGPU::MTBUFFormat;
5828 
5829   auto Id = getUnifiedFormat(FormatStr);
5830   if (Id == UFMT_UNDEF)
5831     return MatchOperand_NoMatch;
5832 
5833   if (!isGFX10Plus()) {
5834     Error(Loc, "unified format is not supported on this GPU");
5835     return MatchOperand_ParseFail;
5836   }
5837 
5838   Format = Id;
5839   return MatchOperand_Success;
5840 }
5841 
5842 OperandMatchResultTy
5843 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5844   using namespace llvm::AMDGPU::MTBUFFormat;
5845   SMLoc Loc = getLoc();
5846 
5847   if (!parseExpr(Format))
5848     return MatchOperand_ParseFail;
5849   if (!isValidFormatEncoding(Format, getSTI())) {
5850     Error(Loc, "out of range format");
5851     return MatchOperand_ParseFail;
5852   }
5853 
5854   return MatchOperand_Success;
5855 }
5856 
5857 OperandMatchResultTy
5858 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5859   using namespace llvm::AMDGPU::MTBUFFormat;
5860 
5861   if (!trySkipId("format", AsmToken::Colon))
5862     return MatchOperand_NoMatch;
5863 
5864   if (trySkipToken(AsmToken::LBrac)) {
5865     StringRef FormatStr;
5866     SMLoc Loc = getLoc();
5867     if (!parseId(FormatStr, "expected a format string"))
5868       return MatchOperand_ParseFail;
5869 
5870     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5871     if (Res == MatchOperand_NoMatch)
5872       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5873     if (Res != MatchOperand_Success)
5874       return Res;
5875 
5876     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5877       return MatchOperand_ParseFail;
5878 
5879     return MatchOperand_Success;
5880   }
5881 
5882   return parseNumericFormat(Format);
5883 }
5884 
5885 OperandMatchResultTy
5886 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5887   using namespace llvm::AMDGPU::MTBUFFormat;
5888 
5889   int64_t Format = getDefaultFormatEncoding(getSTI());
5890   OperandMatchResultTy Res;
5891   SMLoc Loc = getLoc();
5892 
5893   // Parse legacy format syntax.
5894   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5895   if (Res == MatchOperand_ParseFail)
5896     return Res;
5897 
5898   bool FormatFound = (Res == MatchOperand_Success);
5899 
5900   Operands.push_back(
5901     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5902 
5903   if (FormatFound)
5904     trySkipToken(AsmToken::Comma);
5905 
5906   if (isToken(AsmToken::EndOfStatement)) {
5907     // We are expecting an soffset operand,
5908     // but let matcher handle the error.
5909     return MatchOperand_Success;
5910   }
5911 
5912   // Parse soffset.
5913   Res = parseRegOrImm(Operands);
5914   if (Res != MatchOperand_Success)
5915     return Res;
5916 
5917   trySkipToken(AsmToken::Comma);
5918 
5919   if (!FormatFound) {
5920     Res = parseSymbolicOrNumericFormat(Format);
5921     if (Res == MatchOperand_ParseFail)
5922       return Res;
5923     if (Res == MatchOperand_Success) {
5924       auto Size = Operands.size();
5925       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5926       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5927       Op.setImm(Format);
5928     }
5929     return MatchOperand_Success;
5930   }
5931 
5932   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5933     Error(getLoc(), "duplicate format");
5934     return MatchOperand_ParseFail;
5935   }
5936   return MatchOperand_Success;
5937 }
5938 
5939 //===----------------------------------------------------------------------===//
5940 // ds
5941 //===----------------------------------------------------------------------===//
5942 
5943 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5944                                     const OperandVector &Operands) {
5945   OptionalImmIndexMap OptionalIdx;
5946 
5947   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5948     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5949 
5950     // Add the register arguments
5951     if (Op.isReg()) {
5952       Op.addRegOperands(Inst, 1);
5953       continue;
5954     }
5955 
5956     // Handle optional arguments
5957     OptionalIdx[Op.getImmTy()] = i;
5958   }
5959 
5960   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5961   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5962   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5963 
5964   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5965 }
5966 
5967 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5968                                 bool IsGdsHardcoded) {
5969   OptionalImmIndexMap OptionalIdx;
5970 
5971   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5972     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5973 
5974     // Add the register arguments
5975     if (Op.isReg()) {
5976       Op.addRegOperands(Inst, 1);
5977       continue;
5978     }
5979 
5980     if (Op.isToken() && Op.getToken() == "gds") {
5981       IsGdsHardcoded = true;
5982       continue;
5983     }
5984 
5985     // Handle optional arguments
5986     OptionalIdx[Op.getImmTy()] = i;
5987   }
5988 
5989   AMDGPUOperand::ImmTy OffsetType =
5990     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5991      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5992      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5993                                                       AMDGPUOperand::ImmTyOffset;
5994 
5995   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5996 
5997   if (!IsGdsHardcoded) {
5998     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5999   }
6000   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6001 }
6002 
6003 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6004   OptionalImmIndexMap OptionalIdx;
6005 
6006   unsigned OperandIdx[4];
6007   unsigned EnMask = 0;
6008   int SrcIdx = 0;
6009 
6010   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6011     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6012 
6013     // Add the register arguments
6014     if (Op.isReg()) {
6015       assert(SrcIdx < 4);
6016       OperandIdx[SrcIdx] = Inst.size();
6017       Op.addRegOperands(Inst, 1);
6018       ++SrcIdx;
6019       continue;
6020     }
6021 
6022     if (Op.isOff()) {
6023       assert(SrcIdx < 4);
6024       OperandIdx[SrcIdx] = Inst.size();
6025       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6026       ++SrcIdx;
6027       continue;
6028     }
6029 
6030     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6031       Op.addImmOperands(Inst, 1);
6032       continue;
6033     }
6034 
6035     if (Op.isToken() && Op.getToken() == "done")
6036       continue;
6037 
6038     // Handle optional arguments
6039     OptionalIdx[Op.getImmTy()] = i;
6040   }
6041 
6042   assert(SrcIdx == 4);
6043 
6044   bool Compr = false;
6045   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6046     Compr = true;
6047     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6048     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6049     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6050   }
6051 
6052   for (auto i = 0; i < SrcIdx; ++i) {
6053     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6054       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6055     }
6056   }
6057 
6058   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6059   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6060 
6061   Inst.addOperand(MCOperand::createImm(EnMask));
6062 }
6063 
6064 //===----------------------------------------------------------------------===//
6065 // s_waitcnt
6066 //===----------------------------------------------------------------------===//
6067 
6068 static bool
6069 encodeCnt(
6070   const AMDGPU::IsaVersion ISA,
6071   int64_t &IntVal,
6072   int64_t CntVal,
6073   bool Saturate,
6074   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6075   unsigned (*decode)(const IsaVersion &Version, unsigned))
6076 {
6077   bool Failed = false;
6078 
6079   IntVal = encode(ISA, IntVal, CntVal);
6080   if (CntVal != decode(ISA, IntVal)) {
6081     if (Saturate) {
6082       IntVal = encode(ISA, IntVal, -1);
6083     } else {
6084       Failed = true;
6085     }
6086   }
6087   return Failed;
6088 }
6089 
6090 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6091 
6092   SMLoc CntLoc = getLoc();
6093   StringRef CntName = getTokenStr();
6094 
6095   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6096       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6097     return false;
6098 
6099   int64_t CntVal;
6100   SMLoc ValLoc = getLoc();
6101   if (!parseExpr(CntVal))
6102     return false;
6103 
6104   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6105 
6106   bool Failed = true;
6107   bool Sat = CntName.endswith("_sat");
6108 
6109   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6110     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6111   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6112     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6113   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6114     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6115   } else {
6116     Error(CntLoc, "invalid counter name " + CntName);
6117     return false;
6118   }
6119 
6120   if (Failed) {
6121     Error(ValLoc, "too large value for " + CntName);
6122     return false;
6123   }
6124 
6125   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6126     return false;
6127 
6128   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6129     if (isToken(AsmToken::EndOfStatement)) {
6130       Error(getLoc(), "expected a counter name");
6131       return false;
6132     }
6133   }
6134 
6135   return true;
6136 }
6137 
6138 OperandMatchResultTy
6139 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6140   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6141   int64_t Waitcnt = getWaitcntBitMask(ISA);
6142   SMLoc S = getLoc();
6143 
6144   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6145     while (!isToken(AsmToken::EndOfStatement)) {
6146       if (!parseCnt(Waitcnt))
6147         return MatchOperand_ParseFail;
6148     }
6149   } else {
6150     if (!parseExpr(Waitcnt))
6151       return MatchOperand_ParseFail;
6152   }
6153 
6154   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6155   return MatchOperand_Success;
6156 }
6157 
6158 bool
6159 AMDGPUOperand::isSWaitCnt() const {
6160   return isImm();
6161 }
6162 
6163 //===----------------------------------------------------------------------===//
6164 // hwreg
6165 //===----------------------------------------------------------------------===//
6166 
6167 bool
6168 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6169                                 OperandInfoTy &Offset,
6170                                 OperandInfoTy &Width) {
6171   using namespace llvm::AMDGPU::Hwreg;
6172 
6173   // The register may be specified by name or using a numeric code
6174   HwReg.Loc = getLoc();
6175   if (isToken(AsmToken::Identifier) &&
6176       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6177     HwReg.IsSymbolic = true;
6178     lex(); // skip register name
6179   } else if (!parseExpr(HwReg.Id, "a register name")) {
6180     return false;
6181   }
6182 
6183   if (trySkipToken(AsmToken::RParen))
6184     return true;
6185 
6186   // parse optional params
6187   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6188     return false;
6189 
6190   Offset.Loc = getLoc();
6191   if (!parseExpr(Offset.Id))
6192     return false;
6193 
6194   if (!skipToken(AsmToken::Comma, "expected a comma"))
6195     return false;
6196 
6197   Width.Loc = getLoc();
6198   return parseExpr(Width.Id) &&
6199          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6200 }
6201 
6202 bool
6203 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6204                                const OperandInfoTy &Offset,
6205                                const OperandInfoTy &Width) {
6206 
6207   using namespace llvm::AMDGPU::Hwreg;
6208 
6209   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6210     Error(HwReg.Loc,
6211           "specified hardware register is not supported on this GPU");
6212     return false;
6213   }
6214   if (!isValidHwreg(HwReg.Id)) {
6215     Error(HwReg.Loc,
6216           "invalid code of hardware register: only 6-bit values are legal");
6217     return false;
6218   }
6219   if (!isValidHwregOffset(Offset.Id)) {
6220     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6221     return false;
6222   }
6223   if (!isValidHwregWidth(Width.Id)) {
6224     Error(Width.Loc,
6225           "invalid bitfield width: only values from 1 to 32 are legal");
6226     return false;
6227   }
6228   return true;
6229 }
6230 
6231 OperandMatchResultTy
6232 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6233   using namespace llvm::AMDGPU::Hwreg;
6234 
6235   int64_t ImmVal = 0;
6236   SMLoc Loc = getLoc();
6237 
6238   if (trySkipId("hwreg", AsmToken::LParen)) {
6239     OperandInfoTy HwReg(ID_UNKNOWN_);
6240     OperandInfoTy Offset(OFFSET_DEFAULT_);
6241     OperandInfoTy Width(WIDTH_DEFAULT_);
6242     if (parseHwregBody(HwReg, Offset, Width) &&
6243         validateHwreg(HwReg, Offset, Width)) {
6244       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6245     } else {
6246       return MatchOperand_ParseFail;
6247     }
6248   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6249     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6250       Error(Loc, "invalid immediate: only 16-bit values are legal");
6251       return MatchOperand_ParseFail;
6252     }
6253   } else {
6254     return MatchOperand_ParseFail;
6255   }
6256 
6257   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6258   return MatchOperand_Success;
6259 }
6260 
6261 bool AMDGPUOperand::isHwreg() const {
6262   return isImmTy(ImmTyHwreg);
6263 }
6264 
6265 //===----------------------------------------------------------------------===//
6266 // sendmsg
6267 //===----------------------------------------------------------------------===//
6268 
6269 bool
6270 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6271                                   OperandInfoTy &Op,
6272                                   OperandInfoTy &Stream) {
6273   using namespace llvm::AMDGPU::SendMsg;
6274 
6275   Msg.Loc = getLoc();
6276   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6277     Msg.IsSymbolic = true;
6278     lex(); // skip message name
6279   } else if (!parseExpr(Msg.Id, "a message name")) {
6280     return false;
6281   }
6282 
6283   if (trySkipToken(AsmToken::Comma)) {
6284     Op.IsDefined = true;
6285     Op.Loc = getLoc();
6286     if (isToken(AsmToken::Identifier) &&
6287         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6288       lex(); // skip operation name
6289     } else if (!parseExpr(Op.Id, "an operation name")) {
6290       return false;
6291     }
6292 
6293     if (trySkipToken(AsmToken::Comma)) {
6294       Stream.IsDefined = true;
6295       Stream.Loc = getLoc();
6296       if (!parseExpr(Stream.Id))
6297         return false;
6298     }
6299   }
6300 
6301   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6302 }
6303 
6304 bool
6305 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6306                                  const OperandInfoTy &Op,
6307                                  const OperandInfoTy &Stream) {
6308   using namespace llvm::AMDGPU::SendMsg;
6309 
6310   // Validation strictness depends on whether message is specified
6311   // in a symbolc or in a numeric form. In the latter case
6312   // only encoding possibility is checked.
6313   bool Strict = Msg.IsSymbolic;
6314 
6315   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6316     Error(Msg.Loc, "invalid message id");
6317     return false;
6318   }
6319   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6320     if (Op.IsDefined) {
6321       Error(Op.Loc, "message does not support operations");
6322     } else {
6323       Error(Msg.Loc, "missing message operation");
6324     }
6325     return false;
6326   }
6327   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6328     Error(Op.Loc, "invalid operation id");
6329     return false;
6330   }
6331   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6332     Error(Stream.Loc, "message operation does not support streams");
6333     return false;
6334   }
6335   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6336     Error(Stream.Loc, "invalid message stream id");
6337     return false;
6338   }
6339   return true;
6340 }
6341 
6342 OperandMatchResultTy
6343 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6344   using namespace llvm::AMDGPU::SendMsg;
6345 
6346   int64_t ImmVal = 0;
6347   SMLoc Loc = getLoc();
6348 
6349   if (trySkipId("sendmsg", AsmToken::LParen)) {
6350     OperandInfoTy Msg(ID_UNKNOWN_);
6351     OperandInfoTy Op(OP_NONE_);
6352     OperandInfoTy Stream(STREAM_ID_NONE_);
6353     if (parseSendMsgBody(Msg, Op, Stream) &&
6354         validateSendMsg(Msg, Op, Stream)) {
6355       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6356     } else {
6357       return MatchOperand_ParseFail;
6358     }
6359   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6360     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6361       Error(Loc, "invalid immediate: only 16-bit values are legal");
6362       return MatchOperand_ParseFail;
6363     }
6364   } else {
6365     return MatchOperand_ParseFail;
6366   }
6367 
6368   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6369   return MatchOperand_Success;
6370 }
6371 
6372 bool AMDGPUOperand::isSendMsg() const {
6373   return isImmTy(ImmTySendMsg);
6374 }
6375 
6376 //===----------------------------------------------------------------------===//
6377 // v_interp
6378 //===----------------------------------------------------------------------===//
6379 
6380 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6381   StringRef Str;
6382   SMLoc S = getLoc();
6383 
6384   if (!parseId(Str))
6385     return MatchOperand_NoMatch;
6386 
6387   int Slot = StringSwitch<int>(Str)
6388     .Case("p10", 0)
6389     .Case("p20", 1)
6390     .Case("p0", 2)
6391     .Default(-1);
6392 
6393   if (Slot == -1) {
6394     Error(S, "invalid interpolation slot");
6395     return MatchOperand_ParseFail;
6396   }
6397 
6398   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6399                                               AMDGPUOperand::ImmTyInterpSlot));
6400   return MatchOperand_Success;
6401 }
6402 
6403 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6404   StringRef Str;
6405   SMLoc S = getLoc();
6406 
6407   if (!parseId(Str))
6408     return MatchOperand_NoMatch;
6409 
6410   if (!Str.startswith("attr")) {
6411     Error(S, "invalid interpolation attribute");
6412     return MatchOperand_ParseFail;
6413   }
6414 
6415   StringRef Chan = Str.take_back(2);
6416   int AttrChan = StringSwitch<int>(Chan)
6417     .Case(".x", 0)
6418     .Case(".y", 1)
6419     .Case(".z", 2)
6420     .Case(".w", 3)
6421     .Default(-1);
6422   if (AttrChan == -1) {
6423     Error(S, "invalid or missing interpolation attribute channel");
6424     return MatchOperand_ParseFail;
6425   }
6426 
6427   Str = Str.drop_back(2).drop_front(4);
6428 
6429   uint8_t Attr;
6430   if (Str.getAsInteger(10, Attr)) {
6431     Error(S, "invalid or missing interpolation attribute number");
6432     return MatchOperand_ParseFail;
6433   }
6434 
6435   if (Attr > 63) {
6436     Error(S, "out of bounds interpolation attribute number");
6437     return MatchOperand_ParseFail;
6438   }
6439 
6440   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6441 
6442   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6443                                               AMDGPUOperand::ImmTyInterpAttr));
6444   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6445                                               AMDGPUOperand::ImmTyAttrChan));
6446   return MatchOperand_Success;
6447 }
6448 
6449 //===----------------------------------------------------------------------===//
6450 // exp
6451 //===----------------------------------------------------------------------===//
6452 
6453 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6454   using namespace llvm::AMDGPU::Exp;
6455 
6456   StringRef Str;
6457   SMLoc S = getLoc();
6458 
6459   if (!parseId(Str))
6460     return MatchOperand_NoMatch;
6461 
6462   unsigned Id = getTgtId(Str);
6463   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6464     Error(S, (Id == ET_INVALID) ?
6465                 "invalid exp target" :
6466                 "exp target is not supported on this GPU");
6467     return MatchOperand_ParseFail;
6468   }
6469 
6470   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6471                                               AMDGPUOperand::ImmTyExpTgt));
6472   return MatchOperand_Success;
6473 }
6474 
6475 //===----------------------------------------------------------------------===//
6476 // parser helpers
6477 //===----------------------------------------------------------------------===//
6478 
6479 bool
6480 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6481   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6482 }
6483 
6484 bool
6485 AMDGPUAsmParser::isId(const StringRef Id) const {
6486   return isId(getToken(), Id);
6487 }
6488 
6489 bool
6490 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6491   return getTokenKind() == Kind;
6492 }
6493 
6494 bool
6495 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6496   if (isId(Id)) {
6497     lex();
6498     return true;
6499   }
6500   return false;
6501 }
6502 
6503 bool
6504 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6505   if (isToken(AsmToken::Identifier)) {
6506     StringRef Tok = getTokenStr();
6507     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6508       lex();
6509       return true;
6510     }
6511   }
6512   return false;
6513 }
6514 
6515 bool
6516 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6517   if (isId(Id) && peekToken().is(Kind)) {
6518     lex();
6519     lex();
6520     return true;
6521   }
6522   return false;
6523 }
6524 
6525 bool
6526 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6527   if (isToken(Kind)) {
6528     lex();
6529     return true;
6530   }
6531   return false;
6532 }
6533 
6534 bool
6535 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6536                            const StringRef ErrMsg) {
6537   if (!trySkipToken(Kind)) {
6538     Error(getLoc(), ErrMsg);
6539     return false;
6540   }
6541   return true;
6542 }
6543 
6544 bool
6545 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6546   SMLoc S = getLoc();
6547 
6548   const MCExpr *Expr;
6549   if (Parser.parseExpression(Expr))
6550     return false;
6551 
6552   if (Expr->evaluateAsAbsolute(Imm))
6553     return true;
6554 
6555   if (Expected.empty()) {
6556     Error(S, "expected absolute expression");
6557   } else {
6558     Error(S, Twine("expected ", Expected) +
6559              Twine(" or an absolute expression"));
6560   }
6561   return false;
6562 }
6563 
6564 bool
6565 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6566   SMLoc S = getLoc();
6567 
6568   const MCExpr *Expr;
6569   if (Parser.parseExpression(Expr))
6570     return false;
6571 
6572   int64_t IntVal;
6573   if (Expr->evaluateAsAbsolute(IntVal)) {
6574     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6575   } else {
6576     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6577   }
6578   return true;
6579 }
6580 
6581 bool
6582 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6583   if (isToken(AsmToken::String)) {
6584     Val = getToken().getStringContents();
6585     lex();
6586     return true;
6587   } else {
6588     Error(getLoc(), ErrMsg);
6589     return false;
6590   }
6591 }
6592 
6593 bool
6594 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6595   if (isToken(AsmToken::Identifier)) {
6596     Val = getTokenStr();
6597     lex();
6598     return true;
6599   } else {
6600     if (!ErrMsg.empty())
6601       Error(getLoc(), ErrMsg);
6602     return false;
6603   }
6604 }
6605 
6606 AsmToken
6607 AMDGPUAsmParser::getToken() const {
6608   return Parser.getTok();
6609 }
6610 
6611 AsmToken
6612 AMDGPUAsmParser::peekToken() {
6613   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6614 }
6615 
6616 void
6617 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6618   auto TokCount = getLexer().peekTokens(Tokens);
6619 
6620   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6621     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6622 }
6623 
6624 AsmToken::TokenKind
6625 AMDGPUAsmParser::getTokenKind() const {
6626   return getLexer().getKind();
6627 }
6628 
6629 SMLoc
6630 AMDGPUAsmParser::getLoc() const {
6631   return getToken().getLoc();
6632 }
6633 
6634 StringRef
6635 AMDGPUAsmParser::getTokenStr() const {
6636   return getToken().getString();
6637 }
6638 
6639 void
6640 AMDGPUAsmParser::lex() {
6641   Parser.Lex();
6642 }
6643 
6644 SMLoc
6645 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6646                                const OperandVector &Operands) const {
6647   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6648     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6649     if (Test(Op))
6650       return Op.getStartLoc();
6651   }
6652   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6653 }
6654 
6655 SMLoc
6656 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6657                            const OperandVector &Operands) const {
6658   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6659   return getOperandLoc(Test, Operands);
6660 }
6661 
6662 SMLoc
6663 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6664                            const OperandVector &Operands) const {
6665   auto Test = [=](const AMDGPUOperand& Op) {
6666     return Op.isRegKind() && Op.getReg() == Reg;
6667   };
6668   return getOperandLoc(Test, Operands);
6669 }
6670 
6671 SMLoc
6672 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6673   auto Test = [](const AMDGPUOperand& Op) {
6674     return Op.IsImmKindLiteral() || Op.isExpr();
6675   };
6676   return getOperandLoc(Test, Operands);
6677 }
6678 
6679 SMLoc
6680 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6681   auto Test = [](const AMDGPUOperand& Op) {
6682     return Op.isImmKindConst();
6683   };
6684   return getOperandLoc(Test, Operands);
6685 }
6686 
6687 //===----------------------------------------------------------------------===//
6688 // swizzle
6689 //===----------------------------------------------------------------------===//
6690 
6691 LLVM_READNONE
6692 static unsigned
6693 encodeBitmaskPerm(const unsigned AndMask,
6694                   const unsigned OrMask,
6695                   const unsigned XorMask) {
6696   using namespace llvm::AMDGPU::Swizzle;
6697 
6698   return BITMASK_PERM_ENC |
6699          (AndMask << BITMASK_AND_SHIFT) |
6700          (OrMask  << BITMASK_OR_SHIFT)  |
6701          (XorMask << BITMASK_XOR_SHIFT);
6702 }
6703 
6704 bool
6705 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6706                                      const unsigned MinVal,
6707                                      const unsigned MaxVal,
6708                                      const StringRef ErrMsg,
6709                                      SMLoc &Loc) {
6710   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6711     return false;
6712   }
6713   Loc = getLoc();
6714   if (!parseExpr(Op)) {
6715     return false;
6716   }
6717   if (Op < MinVal || Op > MaxVal) {
6718     Error(Loc, ErrMsg);
6719     return false;
6720   }
6721 
6722   return true;
6723 }
6724 
6725 bool
6726 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6727                                       const unsigned MinVal,
6728                                       const unsigned MaxVal,
6729                                       const StringRef ErrMsg) {
6730   SMLoc Loc;
6731   for (unsigned i = 0; i < OpNum; ++i) {
6732     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6733       return false;
6734   }
6735 
6736   return true;
6737 }
6738 
6739 bool
6740 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6741   using namespace llvm::AMDGPU::Swizzle;
6742 
6743   int64_t Lane[LANE_NUM];
6744   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6745                            "expected a 2-bit lane id")) {
6746     Imm = QUAD_PERM_ENC;
6747     for (unsigned I = 0; I < LANE_NUM; ++I) {
6748       Imm |= Lane[I] << (LANE_SHIFT * I);
6749     }
6750     return true;
6751   }
6752   return false;
6753 }
6754 
6755 bool
6756 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6757   using namespace llvm::AMDGPU::Swizzle;
6758 
6759   SMLoc Loc;
6760   int64_t GroupSize;
6761   int64_t LaneIdx;
6762 
6763   if (!parseSwizzleOperand(GroupSize,
6764                            2, 32,
6765                            "group size must be in the interval [2,32]",
6766                            Loc)) {
6767     return false;
6768   }
6769   if (!isPowerOf2_64(GroupSize)) {
6770     Error(Loc, "group size must be a power of two");
6771     return false;
6772   }
6773   if (parseSwizzleOperand(LaneIdx,
6774                           0, GroupSize - 1,
6775                           "lane id must be in the interval [0,group size - 1]",
6776                           Loc)) {
6777     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6778     return true;
6779   }
6780   return false;
6781 }
6782 
6783 bool
6784 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6785   using namespace llvm::AMDGPU::Swizzle;
6786 
6787   SMLoc Loc;
6788   int64_t GroupSize;
6789 
6790   if (!parseSwizzleOperand(GroupSize,
6791                            2, 32,
6792                            "group size must be in the interval [2,32]",
6793                            Loc)) {
6794     return false;
6795   }
6796   if (!isPowerOf2_64(GroupSize)) {
6797     Error(Loc, "group size must be a power of two");
6798     return false;
6799   }
6800 
6801   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6802   return true;
6803 }
6804 
6805 bool
6806 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6807   using namespace llvm::AMDGPU::Swizzle;
6808 
6809   SMLoc Loc;
6810   int64_t GroupSize;
6811 
6812   if (!parseSwizzleOperand(GroupSize,
6813                            1, 16,
6814                            "group size must be in the interval [1,16]",
6815                            Loc)) {
6816     return false;
6817   }
6818   if (!isPowerOf2_64(GroupSize)) {
6819     Error(Loc, "group size must be a power of two");
6820     return false;
6821   }
6822 
6823   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6824   return true;
6825 }
6826 
6827 bool
6828 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6829   using namespace llvm::AMDGPU::Swizzle;
6830 
6831   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6832     return false;
6833   }
6834 
6835   StringRef Ctl;
6836   SMLoc StrLoc = getLoc();
6837   if (!parseString(Ctl)) {
6838     return false;
6839   }
6840   if (Ctl.size() != BITMASK_WIDTH) {
6841     Error(StrLoc, "expected a 5-character mask");
6842     return false;
6843   }
6844 
6845   unsigned AndMask = 0;
6846   unsigned OrMask = 0;
6847   unsigned XorMask = 0;
6848 
6849   for (size_t i = 0; i < Ctl.size(); ++i) {
6850     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6851     switch(Ctl[i]) {
6852     default:
6853       Error(StrLoc, "invalid mask");
6854       return false;
6855     case '0':
6856       break;
6857     case '1':
6858       OrMask |= Mask;
6859       break;
6860     case 'p':
6861       AndMask |= Mask;
6862       break;
6863     case 'i':
6864       AndMask |= Mask;
6865       XorMask |= Mask;
6866       break;
6867     }
6868   }
6869 
6870   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6871   return true;
6872 }
6873 
6874 bool
6875 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6876 
6877   SMLoc OffsetLoc = getLoc();
6878 
6879   if (!parseExpr(Imm, "a swizzle macro")) {
6880     return false;
6881   }
6882   if (!isUInt<16>(Imm)) {
6883     Error(OffsetLoc, "expected a 16-bit offset");
6884     return false;
6885   }
6886   return true;
6887 }
6888 
6889 bool
6890 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6891   using namespace llvm::AMDGPU::Swizzle;
6892 
6893   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6894 
6895     SMLoc ModeLoc = getLoc();
6896     bool Ok = false;
6897 
6898     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6899       Ok = parseSwizzleQuadPerm(Imm);
6900     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6901       Ok = parseSwizzleBitmaskPerm(Imm);
6902     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6903       Ok = parseSwizzleBroadcast(Imm);
6904     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6905       Ok = parseSwizzleSwap(Imm);
6906     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6907       Ok = parseSwizzleReverse(Imm);
6908     } else {
6909       Error(ModeLoc, "expected a swizzle mode");
6910     }
6911 
6912     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6913   }
6914 
6915   return false;
6916 }
6917 
6918 OperandMatchResultTy
6919 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6920   SMLoc S = getLoc();
6921   int64_t Imm = 0;
6922 
6923   if (trySkipId("offset")) {
6924 
6925     bool Ok = false;
6926     if (skipToken(AsmToken::Colon, "expected a colon")) {
6927       if (trySkipId("swizzle")) {
6928         Ok = parseSwizzleMacro(Imm);
6929       } else {
6930         Ok = parseSwizzleOffset(Imm);
6931       }
6932     }
6933 
6934     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6935 
6936     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6937   } else {
6938     // Swizzle "offset" operand is optional.
6939     // If it is omitted, try parsing other optional operands.
6940     return parseOptionalOpr(Operands);
6941   }
6942 }
6943 
6944 bool
6945 AMDGPUOperand::isSwizzle() const {
6946   return isImmTy(ImmTySwizzle);
6947 }
6948 
6949 //===----------------------------------------------------------------------===//
6950 // VGPR Index Mode
6951 //===----------------------------------------------------------------------===//
6952 
6953 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6954 
6955   using namespace llvm::AMDGPU::VGPRIndexMode;
6956 
6957   if (trySkipToken(AsmToken::RParen)) {
6958     return OFF;
6959   }
6960 
6961   int64_t Imm = 0;
6962 
6963   while (true) {
6964     unsigned Mode = 0;
6965     SMLoc S = getLoc();
6966 
6967     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6968       if (trySkipId(IdSymbolic[ModeId])) {
6969         Mode = 1 << ModeId;
6970         break;
6971       }
6972     }
6973 
6974     if (Mode == 0) {
6975       Error(S, (Imm == 0)?
6976                "expected a VGPR index mode or a closing parenthesis" :
6977                "expected a VGPR index mode");
6978       return UNDEF;
6979     }
6980 
6981     if (Imm & Mode) {
6982       Error(S, "duplicate VGPR index mode");
6983       return UNDEF;
6984     }
6985     Imm |= Mode;
6986 
6987     if (trySkipToken(AsmToken::RParen))
6988       break;
6989     if (!skipToken(AsmToken::Comma,
6990                    "expected a comma or a closing parenthesis"))
6991       return UNDEF;
6992   }
6993 
6994   return Imm;
6995 }
6996 
6997 OperandMatchResultTy
6998 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6999 
7000   using namespace llvm::AMDGPU::VGPRIndexMode;
7001 
7002   int64_t Imm = 0;
7003   SMLoc S = getLoc();
7004 
7005   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7006     Imm = parseGPRIdxMacro();
7007     if (Imm == UNDEF)
7008       return MatchOperand_ParseFail;
7009   } else {
7010     if (getParser().parseAbsoluteExpression(Imm))
7011       return MatchOperand_ParseFail;
7012     if (Imm < 0 || !isUInt<4>(Imm)) {
7013       Error(S, "invalid immediate: only 4-bit values are legal");
7014       return MatchOperand_ParseFail;
7015     }
7016   }
7017 
7018   Operands.push_back(
7019       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7020   return MatchOperand_Success;
7021 }
7022 
7023 bool AMDGPUOperand::isGPRIdxMode() const {
7024   return isImmTy(ImmTyGprIdxMode);
7025 }
7026 
7027 //===----------------------------------------------------------------------===//
7028 // sopp branch targets
7029 //===----------------------------------------------------------------------===//
7030 
7031 OperandMatchResultTy
7032 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7033 
7034   // Make sure we are not parsing something
7035   // that looks like a label or an expression but is not.
7036   // This will improve error messages.
7037   if (isRegister() || isModifier())
7038     return MatchOperand_NoMatch;
7039 
7040   if (!parseExpr(Operands))
7041     return MatchOperand_ParseFail;
7042 
7043   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7044   assert(Opr.isImm() || Opr.isExpr());
7045   SMLoc Loc = Opr.getStartLoc();
7046 
7047   // Currently we do not support arbitrary expressions as branch targets.
7048   // Only labels and absolute expressions are accepted.
7049   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7050     Error(Loc, "expected an absolute expression or a label");
7051   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7052     Error(Loc, "expected a 16-bit signed jump offset");
7053   }
7054 
7055   return MatchOperand_Success;
7056 }
7057 
7058 //===----------------------------------------------------------------------===//
7059 // Boolean holding registers
7060 //===----------------------------------------------------------------------===//
7061 
7062 OperandMatchResultTy
7063 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7064   return parseReg(Operands);
7065 }
7066 
7067 //===----------------------------------------------------------------------===//
7068 // mubuf
7069 //===----------------------------------------------------------------------===//
7070 
7071 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7072   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7073 }
7074 
7075 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7076                                    const OperandVector &Operands,
7077                                    bool IsAtomic,
7078                                    bool IsLds) {
7079   bool IsLdsOpcode = IsLds;
7080   bool HasLdsModifier = false;
7081   OptionalImmIndexMap OptionalIdx;
7082   unsigned FirstOperandIdx = 1;
7083   bool IsAtomicReturn = false;
7084 
7085   if (IsAtomic) {
7086     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7087       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7088       if (!Op.isCPol())
7089         continue;
7090       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7091       break;
7092     }
7093 
7094     if (!IsAtomicReturn) {
7095       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7096       if (NewOpc != -1)
7097         Inst.setOpcode(NewOpc);
7098     }
7099 
7100     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7101                       SIInstrFlags::IsAtomicRet;
7102   }
7103 
7104   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7105     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7106 
7107     // Add the register arguments
7108     if (Op.isReg()) {
7109       Op.addRegOperands(Inst, 1);
7110       // Insert a tied src for atomic return dst.
7111       // This cannot be postponed as subsequent calls to
7112       // addImmOperands rely on correct number of MC operands.
7113       if (IsAtomicReturn && i == FirstOperandIdx)
7114         Op.addRegOperands(Inst, 1);
7115       continue;
7116     }
7117 
7118     // Handle the case where soffset is an immediate
7119     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7120       Op.addImmOperands(Inst, 1);
7121       continue;
7122     }
7123 
7124     HasLdsModifier |= Op.isLDS();
7125 
7126     // Handle tokens like 'offen' which are sometimes hard-coded into the
7127     // asm string.  There are no MCInst operands for these.
7128     if (Op.isToken()) {
7129       continue;
7130     }
7131     assert(Op.isImm());
7132 
7133     // Handle optional arguments
7134     OptionalIdx[Op.getImmTy()] = i;
7135   }
7136 
7137   // This is a workaround for an llvm quirk which may result in an
7138   // incorrect instruction selection. Lds and non-lds versions of
7139   // MUBUF instructions are identical except that lds versions
7140   // have mandatory 'lds' modifier. However this modifier follows
7141   // optional modifiers and llvm asm matcher regards this 'lds'
7142   // modifier as an optional one. As a result, an lds version
7143   // of opcode may be selected even if it has no 'lds' modifier.
7144   if (IsLdsOpcode && !HasLdsModifier) {
7145     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7146     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7147       Inst.setOpcode(NoLdsOpcode);
7148       IsLdsOpcode = false;
7149     }
7150   }
7151 
7152   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7153   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7154 
7155   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7156     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7157   }
7158   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7159 }
7160 
7161 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7162   OptionalImmIndexMap OptionalIdx;
7163 
7164   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7165     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7166 
7167     // Add the register arguments
7168     if (Op.isReg()) {
7169       Op.addRegOperands(Inst, 1);
7170       continue;
7171     }
7172 
7173     // Handle the case where soffset is an immediate
7174     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7175       Op.addImmOperands(Inst, 1);
7176       continue;
7177     }
7178 
7179     // Handle tokens like 'offen' which are sometimes hard-coded into the
7180     // asm string.  There are no MCInst operands for these.
7181     if (Op.isToken()) {
7182       continue;
7183     }
7184     assert(Op.isImm());
7185 
7186     // Handle optional arguments
7187     OptionalIdx[Op.getImmTy()] = i;
7188   }
7189 
7190   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7191                         AMDGPUOperand::ImmTyOffset);
7192   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7193   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7194   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7196 }
7197 
7198 //===----------------------------------------------------------------------===//
7199 // mimg
7200 //===----------------------------------------------------------------------===//
7201 
7202 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7203                               bool IsAtomic) {
7204   unsigned I = 1;
7205   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7206   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7207     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7208   }
7209 
7210   if (IsAtomic) {
7211     // Add src, same as dst
7212     assert(Desc.getNumDefs() == 1);
7213     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7214   }
7215 
7216   OptionalImmIndexMap OptionalIdx;
7217 
7218   for (unsigned E = Operands.size(); I != E; ++I) {
7219     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7220 
7221     // Add the register arguments
7222     if (Op.isReg()) {
7223       Op.addRegOperands(Inst, 1);
7224     } else if (Op.isImmModifier()) {
7225       OptionalIdx[Op.getImmTy()] = I;
7226     } else if (!Op.isToken()) {
7227       llvm_unreachable("unexpected operand type");
7228     }
7229   }
7230 
7231   bool IsGFX10Plus = isGFX10Plus();
7232 
7233   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7234   if (IsGFX10Plus)
7235     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7236   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7237   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7238   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7239   if (IsGFX10Plus)
7240     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7241   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7242     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7243   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7244   if (!IsGFX10Plus)
7245     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7246   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7247 }
7248 
7249 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7250   cvtMIMG(Inst, Operands, true);
7251 }
7252 
7253 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7254   OptionalImmIndexMap OptionalIdx;
7255   bool IsAtomicReturn = false;
7256 
7257   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7258     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7259     if (!Op.isCPol())
7260       continue;
7261     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7262     break;
7263   }
7264 
7265   if (!IsAtomicReturn) {
7266     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7267     if (NewOpc != -1)
7268       Inst.setOpcode(NewOpc);
7269   }
7270 
7271   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7272                     SIInstrFlags::IsAtomicRet;
7273 
7274   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7275     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7276 
7277     // Add the register arguments
7278     if (Op.isReg()) {
7279       Op.addRegOperands(Inst, 1);
7280       if (IsAtomicReturn && i == 1)
7281         Op.addRegOperands(Inst, 1);
7282       continue;
7283     }
7284 
7285     // Handle the case where soffset is an immediate
7286     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7287       Op.addImmOperands(Inst, 1);
7288       continue;
7289     }
7290 
7291     // Handle tokens like 'offen' which are sometimes hard-coded into the
7292     // asm string.  There are no MCInst operands for these.
7293     if (Op.isToken()) {
7294       continue;
7295     }
7296     assert(Op.isImm());
7297 
7298     // Handle optional arguments
7299     OptionalIdx[Op.getImmTy()] = i;
7300   }
7301 
7302   if ((int)Inst.getNumOperands() <=
7303       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7304     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7305   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7306 }
7307 
7308 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7309                                       const OperandVector &Operands) {
7310   for (unsigned I = 1; I < Operands.size(); ++I) {
7311     auto &Operand = (AMDGPUOperand &)*Operands[I];
7312     if (Operand.isReg())
7313       Operand.addRegOperands(Inst, 1);
7314   }
7315 
7316   Inst.addOperand(MCOperand::createImm(1)); // a16
7317 }
7318 
7319 //===----------------------------------------------------------------------===//
7320 // smrd
7321 //===----------------------------------------------------------------------===//
7322 
7323 bool AMDGPUOperand::isSMRDOffset8() const {
7324   return isImm() && isUInt<8>(getImm());
7325 }
7326 
7327 bool AMDGPUOperand::isSMEMOffset() const {
7328   return isImm(); // Offset range is checked later by validator.
7329 }
7330 
7331 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7332   // 32-bit literals are only supported on CI and we only want to use them
7333   // when the offset is > 8-bits.
7334   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7335 }
7336 
7337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7339 }
7340 
7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7342   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7343 }
7344 
7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7346   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7347 }
7348 
7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7350   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7351 }
7352 
7353 //===----------------------------------------------------------------------===//
7354 // vop3
7355 //===----------------------------------------------------------------------===//
7356 
7357 static bool ConvertOmodMul(int64_t &Mul) {
7358   if (Mul != 1 && Mul != 2 && Mul != 4)
7359     return false;
7360 
7361   Mul >>= 1;
7362   return true;
7363 }
7364 
7365 static bool ConvertOmodDiv(int64_t &Div) {
7366   if (Div == 1) {
7367     Div = 0;
7368     return true;
7369   }
7370 
7371   if (Div == 2) {
7372     Div = 3;
7373     return true;
7374   }
7375 
7376   return false;
7377 }
7378 
7379 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7380 // This is intentional and ensures compatibility with sp3.
7381 // See bug 35397 for details.
7382 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7383   if (BoundCtrl == 0 || BoundCtrl == 1) {
7384     BoundCtrl = 1;
7385     return true;
7386   }
7387   return false;
7388 }
7389 
7390 // Note: the order in this table matches the order of operands in AsmString.
7391 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7392   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7393   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7394   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7395   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7396   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7397   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7398   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7399   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7400   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7401   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7402   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7403   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7404   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7405   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7406   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7407   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7408   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7409   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7410   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7411   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7412   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7413   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7414   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7415   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7416   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7417   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7418   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7419   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7420   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7421   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7422   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7423   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7424   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7425   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7426   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7427   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7428   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7429   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7430   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7431   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7432   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7433 };
7434 
7435 void AMDGPUAsmParser::onBeginOfFile() {
7436   if (!getParser().getStreamer().getTargetStreamer() ||
7437       getSTI().getTargetTriple().getArch() == Triple::r600)
7438     return;
7439 
7440   if (!getTargetStreamer().getTargetID())
7441     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7442 
7443   if (isHsaAbiVersion3Or4(&getSTI()))
7444     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7445 }
7446 
7447 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7448 
7449   OperandMatchResultTy res = parseOptionalOpr(Operands);
7450 
7451   // This is a hack to enable hardcoded mandatory operands which follow
7452   // optional operands.
7453   //
7454   // Current design assumes that all operands after the first optional operand
7455   // are also optional. However implementation of some instructions violates
7456   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7457   //
7458   // To alleviate this problem, we have to (implicitly) parse extra operands
7459   // to make sure autogenerated parser of custom operands never hit hardcoded
7460   // mandatory operands.
7461 
7462   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7463     if (res != MatchOperand_Success ||
7464         isToken(AsmToken::EndOfStatement))
7465       break;
7466 
7467     trySkipToken(AsmToken::Comma);
7468     res = parseOptionalOpr(Operands);
7469   }
7470 
7471   return res;
7472 }
7473 
7474 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7475   OperandMatchResultTy res;
7476   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7477     // try to parse any optional operand here
7478     if (Op.IsBit) {
7479       res = parseNamedBit(Op.Name, Operands, Op.Type);
7480     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7481       res = parseOModOperand(Operands);
7482     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7483                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7484                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7485       res = parseSDWASel(Operands, Op.Name, Op.Type);
7486     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7487       res = parseSDWADstUnused(Operands);
7488     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7489                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7490                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7491                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7492       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7493                                         Op.ConvertResult);
7494     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7495       res = parseDim(Operands);
7496     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7497       res = parseCPol(Operands);
7498     } else {
7499       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7500     }
7501     if (res != MatchOperand_NoMatch) {
7502       return res;
7503     }
7504   }
7505   return MatchOperand_NoMatch;
7506 }
7507 
7508 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7509   StringRef Name = getTokenStr();
7510   if (Name == "mul") {
7511     return parseIntWithPrefix("mul", Operands,
7512                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7513   }
7514 
7515   if (Name == "div") {
7516     return parseIntWithPrefix("div", Operands,
7517                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7518   }
7519 
7520   return MatchOperand_NoMatch;
7521 }
7522 
7523 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7524   cvtVOP3P(Inst, Operands);
7525 
7526   int Opc = Inst.getOpcode();
7527 
7528   int SrcNum;
7529   const int Ops[] = { AMDGPU::OpName::src0,
7530                       AMDGPU::OpName::src1,
7531                       AMDGPU::OpName::src2 };
7532   for (SrcNum = 0;
7533        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7534        ++SrcNum);
7535   assert(SrcNum > 0);
7536 
7537   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7538   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7539 
7540   if ((OpSel & (1 << SrcNum)) != 0) {
7541     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7542     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7543     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7544   }
7545 }
7546 
7547 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7548       // 1. This operand is input modifiers
7549   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7550       // 2. This is not last operand
7551       && Desc.NumOperands > (OpNum + 1)
7552       // 3. Next operand is register class
7553       && Desc.OpInfo[OpNum + 1].RegClass != -1
7554       // 4. Next register is not tied to any other operand
7555       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7556 }
7557 
7558 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7559 {
7560   OptionalImmIndexMap OptionalIdx;
7561   unsigned Opc = Inst.getOpcode();
7562 
7563   unsigned I = 1;
7564   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7565   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7566     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7567   }
7568 
7569   for (unsigned E = Operands.size(); I != E; ++I) {
7570     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7571     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7572       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7573     } else if (Op.isInterpSlot() ||
7574                Op.isInterpAttr() ||
7575                Op.isAttrChan()) {
7576       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7577     } else if (Op.isImmModifier()) {
7578       OptionalIdx[Op.getImmTy()] = I;
7579     } else {
7580       llvm_unreachable("unhandled operand type");
7581     }
7582   }
7583 
7584   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7585     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7586   }
7587 
7588   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7589     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7590   }
7591 
7592   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7593     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7594   }
7595 }
7596 
7597 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7598                               OptionalImmIndexMap &OptionalIdx) {
7599   unsigned Opc = Inst.getOpcode();
7600 
7601   unsigned I = 1;
7602   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7603   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7604     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7605   }
7606 
7607   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7608     // This instruction has src modifiers
7609     for (unsigned E = Operands.size(); I != E; ++I) {
7610       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7611       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7612         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7613       } else if (Op.isImmModifier()) {
7614         OptionalIdx[Op.getImmTy()] = I;
7615       } else if (Op.isRegOrImm()) {
7616         Op.addRegOrImmOperands(Inst, 1);
7617       } else {
7618         llvm_unreachable("unhandled operand type");
7619       }
7620     }
7621   } else {
7622     // No src modifiers
7623     for (unsigned E = Operands.size(); I != E; ++I) {
7624       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7625       if (Op.isMod()) {
7626         OptionalIdx[Op.getImmTy()] = I;
7627       } else {
7628         Op.addRegOrImmOperands(Inst, 1);
7629       }
7630     }
7631   }
7632 
7633   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7634     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7635   }
7636 
7637   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7638     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7639   }
7640 
7641   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7642   // it has src2 register operand that is tied to dst operand
7643   // we don't allow modifiers for this operand in assembler so src2_modifiers
7644   // should be 0.
7645   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7646       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7647       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7648       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7649       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7650       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7651       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7652       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7653       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7654       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7655       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7656     auto it = Inst.begin();
7657     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7658     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7659     ++it;
7660     // Copy the operand to ensure it's not invalidated when Inst grows.
7661     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7662   }
7663 }
7664 
7665 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7666   OptionalImmIndexMap OptionalIdx;
7667   cvtVOP3(Inst, Operands, OptionalIdx);
7668 }
7669 
7670 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7671                                OptionalImmIndexMap &OptIdx) {
7672   const int Opc = Inst.getOpcode();
7673   const MCInstrDesc &Desc = MII.get(Opc);
7674 
7675   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7676 
7677   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7678     assert(!IsPacked);
7679     Inst.addOperand(Inst.getOperand(0));
7680   }
7681 
7682   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7683   // instruction, and then figure out where to actually put the modifiers
7684 
7685   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7686   if (OpSelIdx != -1) {
7687     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7688   }
7689 
7690   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7691   if (OpSelHiIdx != -1) {
7692     int DefaultVal = IsPacked ? -1 : 0;
7693     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7694                           DefaultVal);
7695   }
7696 
7697   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7698   if (NegLoIdx != -1) {
7699     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7700     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7701   }
7702 
7703   const int Ops[] = { AMDGPU::OpName::src0,
7704                       AMDGPU::OpName::src1,
7705                       AMDGPU::OpName::src2 };
7706   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7707                          AMDGPU::OpName::src1_modifiers,
7708                          AMDGPU::OpName::src2_modifiers };
7709 
7710   unsigned OpSel = 0;
7711   unsigned OpSelHi = 0;
7712   unsigned NegLo = 0;
7713   unsigned NegHi = 0;
7714 
7715   if (OpSelIdx != -1)
7716     OpSel = Inst.getOperand(OpSelIdx).getImm();
7717 
7718   if (OpSelHiIdx != -1)
7719     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7720 
7721   if (NegLoIdx != -1) {
7722     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7723     NegLo = Inst.getOperand(NegLoIdx).getImm();
7724     NegHi = Inst.getOperand(NegHiIdx).getImm();
7725   }
7726 
7727   for (int J = 0; J < 3; ++J) {
7728     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7729     if (OpIdx == -1)
7730       break;
7731 
7732     uint32_t ModVal = 0;
7733 
7734     if ((OpSel & (1 << J)) != 0)
7735       ModVal |= SISrcMods::OP_SEL_0;
7736 
7737     if ((OpSelHi & (1 << J)) != 0)
7738       ModVal |= SISrcMods::OP_SEL_1;
7739 
7740     if ((NegLo & (1 << J)) != 0)
7741       ModVal |= SISrcMods::NEG;
7742 
7743     if ((NegHi & (1 << J)) != 0)
7744       ModVal |= SISrcMods::NEG_HI;
7745 
7746     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7747 
7748     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7749   }
7750 }
7751 
7752 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7753   OptionalImmIndexMap OptIdx;
7754   cvtVOP3(Inst, Operands, OptIdx);
7755   cvtVOP3P(Inst, Operands, OptIdx);
7756 }
7757 
7758 //===----------------------------------------------------------------------===//
7759 // dpp
7760 //===----------------------------------------------------------------------===//
7761 
7762 bool AMDGPUOperand::isDPP8() const {
7763   return isImmTy(ImmTyDPP8);
7764 }
7765 
7766 bool AMDGPUOperand::isDPPCtrl() const {
7767   using namespace AMDGPU::DPP;
7768 
7769   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7770   if (result) {
7771     int64_t Imm = getImm();
7772     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7773            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7774            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7775            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7776            (Imm == DppCtrl::WAVE_SHL1) ||
7777            (Imm == DppCtrl::WAVE_ROL1) ||
7778            (Imm == DppCtrl::WAVE_SHR1) ||
7779            (Imm == DppCtrl::WAVE_ROR1) ||
7780            (Imm == DppCtrl::ROW_MIRROR) ||
7781            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7782            (Imm == DppCtrl::BCAST15) ||
7783            (Imm == DppCtrl::BCAST31) ||
7784            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7785            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7786   }
7787   return false;
7788 }
7789 
7790 //===----------------------------------------------------------------------===//
7791 // mAI
7792 //===----------------------------------------------------------------------===//
7793 
7794 bool AMDGPUOperand::isBLGP() const {
7795   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7796 }
7797 
7798 bool AMDGPUOperand::isCBSZ() const {
7799   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7800 }
7801 
7802 bool AMDGPUOperand::isABID() const {
7803   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7804 }
7805 
7806 bool AMDGPUOperand::isS16Imm() const {
7807   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7808 }
7809 
7810 bool AMDGPUOperand::isU16Imm() const {
7811   return isImm() && isUInt<16>(getImm());
7812 }
7813 
7814 //===----------------------------------------------------------------------===//
7815 // dim
7816 //===----------------------------------------------------------------------===//
7817 
7818 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7819   // We want to allow "dim:1D" etc.,
7820   // but the initial 1 is tokenized as an integer.
7821   std::string Token;
7822   if (isToken(AsmToken::Integer)) {
7823     SMLoc Loc = getToken().getEndLoc();
7824     Token = std::string(getTokenStr());
7825     lex();
7826     if (getLoc() != Loc)
7827       return false;
7828   }
7829 
7830   StringRef Suffix;
7831   if (!parseId(Suffix))
7832     return false;
7833   Token += Suffix;
7834 
7835   StringRef DimId = Token;
7836   if (DimId.startswith("SQ_RSRC_IMG_"))
7837     DimId = DimId.drop_front(12);
7838 
7839   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7840   if (!DimInfo)
7841     return false;
7842 
7843   Encoding = DimInfo->Encoding;
7844   return true;
7845 }
7846 
7847 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7848   if (!isGFX10Plus())
7849     return MatchOperand_NoMatch;
7850 
7851   SMLoc S = getLoc();
7852 
7853   if (!trySkipId("dim", AsmToken::Colon))
7854     return MatchOperand_NoMatch;
7855 
7856   unsigned Encoding;
7857   SMLoc Loc = getLoc();
7858   if (!parseDimId(Encoding)) {
7859     Error(Loc, "invalid dim value");
7860     return MatchOperand_ParseFail;
7861   }
7862 
7863   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7864                                               AMDGPUOperand::ImmTyDim));
7865   return MatchOperand_Success;
7866 }
7867 
7868 //===----------------------------------------------------------------------===//
7869 // dpp
7870 //===----------------------------------------------------------------------===//
7871 
7872 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7873   SMLoc S = getLoc();
7874 
7875   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7876     return MatchOperand_NoMatch;
7877 
7878   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7879 
7880   int64_t Sels[8];
7881 
7882   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7883     return MatchOperand_ParseFail;
7884 
7885   for (size_t i = 0; i < 8; ++i) {
7886     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7887       return MatchOperand_ParseFail;
7888 
7889     SMLoc Loc = getLoc();
7890     if (getParser().parseAbsoluteExpression(Sels[i]))
7891       return MatchOperand_ParseFail;
7892     if (0 > Sels[i] || 7 < Sels[i]) {
7893       Error(Loc, "expected a 3-bit value");
7894       return MatchOperand_ParseFail;
7895     }
7896   }
7897 
7898   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7899     return MatchOperand_ParseFail;
7900 
7901   unsigned DPP8 = 0;
7902   for (size_t i = 0; i < 8; ++i)
7903     DPP8 |= (Sels[i] << (i * 3));
7904 
7905   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7906   return MatchOperand_Success;
7907 }
7908 
7909 bool
7910 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7911                                     const OperandVector &Operands) {
7912   if (Ctrl == "row_newbcast")
7913     return isGFX90A();
7914 
7915   if (Ctrl == "row_share" ||
7916       Ctrl == "row_xmask")
7917     return isGFX10Plus();
7918 
7919   if (Ctrl == "wave_shl" ||
7920       Ctrl == "wave_shr" ||
7921       Ctrl == "wave_rol" ||
7922       Ctrl == "wave_ror" ||
7923       Ctrl == "row_bcast")
7924     return isVI() || isGFX9();
7925 
7926   return Ctrl == "row_mirror" ||
7927          Ctrl == "row_half_mirror" ||
7928          Ctrl == "quad_perm" ||
7929          Ctrl == "row_shl" ||
7930          Ctrl == "row_shr" ||
7931          Ctrl == "row_ror";
7932 }
7933 
7934 int64_t
7935 AMDGPUAsmParser::parseDPPCtrlPerm() {
7936   // quad_perm:[%d,%d,%d,%d]
7937 
7938   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7939     return -1;
7940 
7941   int64_t Val = 0;
7942   for (int i = 0; i < 4; ++i) {
7943     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7944       return -1;
7945 
7946     int64_t Temp;
7947     SMLoc Loc = getLoc();
7948     if (getParser().parseAbsoluteExpression(Temp))
7949       return -1;
7950     if (Temp < 0 || Temp > 3) {
7951       Error(Loc, "expected a 2-bit value");
7952       return -1;
7953     }
7954 
7955     Val += (Temp << i * 2);
7956   }
7957 
7958   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7959     return -1;
7960 
7961   return Val;
7962 }
7963 
7964 int64_t
7965 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7966   using namespace AMDGPU::DPP;
7967 
7968   // sel:%d
7969 
7970   int64_t Val;
7971   SMLoc Loc = getLoc();
7972 
7973   if (getParser().parseAbsoluteExpression(Val))
7974     return -1;
7975 
7976   struct DppCtrlCheck {
7977     int64_t Ctrl;
7978     int Lo;
7979     int Hi;
7980   };
7981 
7982   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7983     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7984     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7985     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7986     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7987     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7988     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7989     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7990     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7991     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7992     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7993     .Default({-1, 0, 0});
7994 
7995   bool Valid;
7996   if (Check.Ctrl == -1) {
7997     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7998     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7999   } else {
8000     Valid = Check.Lo <= Val && Val <= Check.Hi;
8001     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8002   }
8003 
8004   if (!Valid) {
8005     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8006     return -1;
8007   }
8008 
8009   return Val;
8010 }
8011 
8012 OperandMatchResultTy
8013 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8014   using namespace AMDGPU::DPP;
8015 
8016   if (!isToken(AsmToken::Identifier) ||
8017       !isSupportedDPPCtrl(getTokenStr(), Operands))
8018     return MatchOperand_NoMatch;
8019 
8020   SMLoc S = getLoc();
8021   int64_t Val = -1;
8022   StringRef Ctrl;
8023 
8024   parseId(Ctrl);
8025 
8026   if (Ctrl == "row_mirror") {
8027     Val = DppCtrl::ROW_MIRROR;
8028   } else if (Ctrl == "row_half_mirror") {
8029     Val = DppCtrl::ROW_HALF_MIRROR;
8030   } else {
8031     if (skipToken(AsmToken::Colon, "expected a colon")) {
8032       if (Ctrl == "quad_perm") {
8033         Val = parseDPPCtrlPerm();
8034       } else {
8035         Val = parseDPPCtrlSel(Ctrl);
8036       }
8037     }
8038   }
8039 
8040   if (Val == -1)
8041     return MatchOperand_ParseFail;
8042 
8043   Operands.push_back(
8044     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8045   return MatchOperand_Success;
8046 }
8047 
8048 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8049   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8050 }
8051 
8052 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8053   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8054 }
8055 
8056 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8057   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8058 }
8059 
8060 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8061   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8062 }
8063 
8064 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8065   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8066 }
8067 
8068 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8069   OptionalImmIndexMap OptionalIdx;
8070 
8071   unsigned Opc = Inst.getOpcode();
8072   bool HasModifiers =
8073       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8074   unsigned I = 1;
8075   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8076   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8077     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8078   }
8079 
8080   int Fi = 0;
8081   for (unsigned E = Operands.size(); I != E; ++I) {
8082     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8083                                             MCOI::TIED_TO);
8084     if (TiedTo != -1) {
8085       assert((unsigned)TiedTo < Inst.getNumOperands());
8086       // handle tied old or src2 for MAC instructions
8087       Inst.addOperand(Inst.getOperand(TiedTo));
8088     }
8089     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8090     // Add the register arguments
8091     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8092       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8093       // Skip it.
8094       continue;
8095     }
8096 
8097     if (IsDPP8) {
8098       if (Op.isDPP8()) {
8099         Op.addImmOperands(Inst, 1);
8100       } else if (HasModifiers &&
8101                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8102         Op.addRegWithFPInputModsOperands(Inst, 2);
8103       } else if (Op.isFI()) {
8104         Fi = Op.getImm();
8105       } else if (Op.isReg()) {
8106         Op.addRegOperands(Inst, 1);
8107       } else {
8108         llvm_unreachable("Invalid operand type");
8109       }
8110     } else {
8111       if (HasModifiers &&
8112           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8113         Op.addRegWithFPInputModsOperands(Inst, 2);
8114       } else if (Op.isReg()) {
8115         Op.addRegOperands(Inst, 1);
8116       } else if (Op.isDPPCtrl()) {
8117         Op.addImmOperands(Inst, 1);
8118       } else if (Op.isImm()) {
8119         // Handle optional arguments
8120         OptionalIdx[Op.getImmTy()] = I;
8121       } else {
8122         llvm_unreachable("Invalid operand type");
8123       }
8124     }
8125   }
8126 
8127   if (IsDPP8) {
8128     using namespace llvm::AMDGPU::DPP;
8129     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8130   } else {
8131     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8132     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8133     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8134     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8135       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8136     }
8137   }
8138 }
8139 
8140 //===----------------------------------------------------------------------===//
8141 // sdwa
8142 //===----------------------------------------------------------------------===//
8143 
8144 OperandMatchResultTy
8145 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8146                               AMDGPUOperand::ImmTy Type) {
8147   using namespace llvm::AMDGPU::SDWA;
8148 
8149   SMLoc S = getLoc();
8150   StringRef Value;
8151   OperandMatchResultTy res;
8152 
8153   SMLoc StringLoc;
8154   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8155   if (res != MatchOperand_Success) {
8156     return res;
8157   }
8158 
8159   int64_t Int;
8160   Int = StringSwitch<int64_t>(Value)
8161         .Case("BYTE_0", SdwaSel::BYTE_0)
8162         .Case("BYTE_1", SdwaSel::BYTE_1)
8163         .Case("BYTE_2", SdwaSel::BYTE_2)
8164         .Case("BYTE_3", SdwaSel::BYTE_3)
8165         .Case("WORD_0", SdwaSel::WORD_0)
8166         .Case("WORD_1", SdwaSel::WORD_1)
8167         .Case("DWORD", SdwaSel::DWORD)
8168         .Default(0xffffffff);
8169 
8170   if (Int == 0xffffffff) {
8171     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8172     return MatchOperand_ParseFail;
8173   }
8174 
8175   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8176   return MatchOperand_Success;
8177 }
8178 
8179 OperandMatchResultTy
8180 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8181   using namespace llvm::AMDGPU::SDWA;
8182 
8183   SMLoc S = getLoc();
8184   StringRef Value;
8185   OperandMatchResultTy res;
8186 
8187   SMLoc StringLoc;
8188   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8189   if (res != MatchOperand_Success) {
8190     return res;
8191   }
8192 
8193   int64_t Int;
8194   Int = StringSwitch<int64_t>(Value)
8195         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8196         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8197         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8198         .Default(0xffffffff);
8199 
8200   if (Int == 0xffffffff) {
8201     Error(StringLoc, "invalid dst_unused value");
8202     return MatchOperand_ParseFail;
8203   }
8204 
8205   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8206   return MatchOperand_Success;
8207 }
8208 
8209 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8210   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8211 }
8212 
8213 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8214   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8215 }
8216 
8217 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8218   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8219 }
8220 
8221 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8222   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8223 }
8224 
8225 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8226   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8227 }
8228 
8229 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8230                               uint64_t BasicInstType,
8231                               bool SkipDstVcc,
8232                               bool SkipSrcVcc) {
8233   using namespace llvm::AMDGPU::SDWA;
8234 
8235   OptionalImmIndexMap OptionalIdx;
8236   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8237   bool SkippedVcc = false;
8238 
8239   unsigned I = 1;
8240   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8241   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8242     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8243   }
8244 
8245   for (unsigned E = Operands.size(); I != E; ++I) {
8246     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8247     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8248         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8249       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8250       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8251       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8252       // Skip VCC only if we didn't skip it on previous iteration.
8253       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8254       if (BasicInstType == SIInstrFlags::VOP2 &&
8255           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8256            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8257         SkippedVcc = true;
8258         continue;
8259       } else if (BasicInstType == SIInstrFlags::VOPC &&
8260                  Inst.getNumOperands() == 0) {
8261         SkippedVcc = true;
8262         continue;
8263       }
8264     }
8265     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8266       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8267     } else if (Op.isImm()) {
8268       // Handle optional arguments
8269       OptionalIdx[Op.getImmTy()] = I;
8270     } else {
8271       llvm_unreachable("Invalid operand type");
8272     }
8273     SkippedVcc = false;
8274   }
8275 
8276   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8277       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8278       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8279     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8280     switch (BasicInstType) {
8281     case SIInstrFlags::VOP1:
8282       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8283       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8284         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8285       }
8286       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8287       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8288       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8289       break;
8290 
8291     case SIInstrFlags::VOP2:
8292       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8293       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8294         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8295       }
8296       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8297       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8298       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8299       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8300       break;
8301 
8302     case SIInstrFlags::VOPC:
8303       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8304         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8305       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8306       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8307       break;
8308 
8309     default:
8310       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8311     }
8312   }
8313 
8314   // special case v_mac_{f16, f32}:
8315   // it has src2 register operand that is tied to dst operand
8316   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8317       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8318     auto it = Inst.begin();
8319     std::advance(
8320       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8321     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8322   }
8323 }
8324 
8325 //===----------------------------------------------------------------------===//
8326 // mAI
8327 //===----------------------------------------------------------------------===//
8328 
8329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8330   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8331 }
8332 
8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8334   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8335 }
8336 
8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8339 }
8340 
8341 /// Force static initialization.
8342 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8343   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8344   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8345 }
8346 
8347 #define GET_REGISTER_MATCHER
8348 #define GET_MATCHER_IMPLEMENTATION
8349 #define GET_MNEMONIC_SPELL_CHECKER
8350 #define GET_MNEMONIC_CHECKER
8351 #include "AMDGPUGenAsmMatcher.inc"
8352 
8353 // This fuction should be defined after auto-generated include so that we have
8354 // MatchClassKind enum defined
8355 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8356                                                      unsigned Kind) {
8357   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8358   // But MatchInstructionImpl() expects to meet token and fails to validate
8359   // operand. This method checks if we are given immediate operand but expect to
8360   // get corresponding token.
8361   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8362   switch (Kind) {
8363   case MCK_addr64:
8364     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8365   case MCK_gds:
8366     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8367   case MCK_lds:
8368     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8369   case MCK_idxen:
8370     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8371   case MCK_offen:
8372     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8373   case MCK_SSrcB32:
8374     // When operands have expression values, they will return true for isToken,
8375     // because it is not possible to distinguish between a token and an
8376     // expression at parse time. MatchInstructionImpl() will always try to
8377     // match an operand as a token, when isToken returns true, and when the
8378     // name of the expression is not a valid token, the match will fail,
8379     // so we need to handle it here.
8380     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8381   case MCK_SSrcF32:
8382     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8383   case MCK_SoppBrTarget:
8384     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8385   case MCK_VReg32OrOff:
8386     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8387   case MCK_InterpSlot:
8388     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8389   case MCK_Attr:
8390     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8391   case MCK_AttrChan:
8392     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8393   case MCK_ImmSMEMOffset:
8394     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8395   case MCK_SReg_64:
8396   case MCK_SReg_64_XEXEC:
8397     // Null is defined as a 32-bit register but
8398     // it should also be enabled with 64-bit operands.
8399     // The following code enables it for SReg_64 operands
8400     // used as source and destination. Remaining source
8401     // operands are handled in isInlinableImm.
8402     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8403   default:
8404     return Match_InvalidOperand;
8405   }
8406 }
8407 
8408 //===----------------------------------------------------------------------===//
8409 // endpgm
8410 //===----------------------------------------------------------------------===//
8411 
8412 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8413   SMLoc S = getLoc();
8414   int64_t Imm = 0;
8415 
8416   if (!parseExpr(Imm)) {
8417     // The operand is optional, if not present default to 0
8418     Imm = 0;
8419   }
8420 
8421   if (!isUInt<16>(Imm)) {
8422     Error(S, "expected a 16-bit value");
8423     return MatchOperand_ParseFail;
8424   }
8425 
8426   Operands.push_back(
8427       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8428   return MatchOperand_Success;
8429 }
8430 
8431 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8432