1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "TargetInfo/AMDGPUTargetInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/AMDGPUMetadata.h"
31 #include "llvm/Support/AMDHSAKernelDescriptor.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/MachineValueType.h"
34 #include "llvm/Support/TargetParser.h"
35 #include "llvm/Support/TargetRegistry.h"
36 
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 using namespace llvm::amdhsa;
40 
41 namespace {
42 
43 class AMDGPUAsmParser;
44 
45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
46 
47 //===----------------------------------------------------------------------===//
48 // Operand
49 //===----------------------------------------------------------------------===//
50 
51 class AMDGPUOperand : public MCParsedAsmOperand {
52   enum KindTy {
53     Token,
54     Immediate,
55     Register,
56     Expression
57   } Kind;
58 
59   SMLoc StartLoc, EndLoc;
60   const AMDGPUAsmParser *AsmParser;
61 
62 public:
63   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
64     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
65 
66   using Ptr = std::unique_ptr<AMDGPUOperand>;
67 
68   struct Modifiers {
69     bool Abs = false;
70     bool Neg = false;
71     bool Sext = false;
72 
73     bool hasFPModifiers() const { return Abs || Neg; }
74     bool hasIntModifiers() const { return Sext; }
75     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
76 
77     int64_t getFPModifiersOperand() const {
78       int64_t Operand = 0;
79       Operand |= Abs ? SISrcMods::ABS : 0u;
80       Operand |= Neg ? SISrcMods::NEG : 0u;
81       return Operand;
82     }
83 
84     int64_t getIntModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Sext ? SISrcMods::SEXT : 0u;
87       return Operand;
88     }
89 
90     int64_t getModifiersOperand() const {
91       assert(!(hasFPModifiers() && hasIntModifiers())
92            && "fp and int modifiers should not be used simultaneously");
93       if (hasFPModifiers()) {
94         return getFPModifiersOperand();
95       } else if (hasIntModifiers()) {
96         return getIntModifiersOperand();
97       } else {
98         return 0;
99       }
100     }
101 
102     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
103   };
104 
105   enum ImmTy {
106     ImmTyNone,
107     ImmTyGDS,
108     ImmTyLDS,
109     ImmTyOffen,
110     ImmTyIdxen,
111     ImmTyAddr64,
112     ImmTyOffset,
113     ImmTyInstOffset,
114     ImmTyOffset0,
115     ImmTyOffset1,
116     ImmTyDLC,
117     ImmTyGLC,
118     ImmTySLC,
119     ImmTySWZ,
120     ImmTyTFE,
121     ImmTyD16,
122     ImmTyClampSI,
123     ImmTyOModSI,
124     ImmTyDPP8,
125     ImmTyDppCtrl,
126     ImmTyDppRowMask,
127     ImmTyDppBankMask,
128     ImmTyDppBoundCtrl,
129     ImmTyDppFi,
130     ImmTySdwaDstSel,
131     ImmTySdwaSrc0Sel,
132     ImmTySdwaSrc1Sel,
133     ImmTySdwaDstUnused,
134     ImmTyDMask,
135     ImmTyDim,
136     ImmTyUNorm,
137     ImmTyDA,
138     ImmTyR128A16,
139     ImmTyA16,
140     ImmTyLWE,
141     ImmTyExpTgt,
142     ImmTyExpCompr,
143     ImmTyExpVM,
144     ImmTyFORMAT,
145     ImmTyHwreg,
146     ImmTyOff,
147     ImmTySendMsg,
148     ImmTyInterpSlot,
149     ImmTyInterpAttr,
150     ImmTyAttrChan,
151     ImmTyOpSel,
152     ImmTyOpSelHi,
153     ImmTyNegLo,
154     ImmTyNegHi,
155     ImmTySwizzle,
156     ImmTyGprIdxMode,
157     ImmTyHigh,
158     ImmTyBLGP,
159     ImmTyCBSZ,
160     ImmTyABID,
161     ImmTyEndpgm,
162   };
163 
164   enum ImmKindTy {
165     ImmKindTyNone,
166     ImmKindTyLiteral,
167     ImmKindTyConst,
168   };
169 
170 private:
171   struct TokOp {
172     const char *Data;
173     unsigned Length;
174   };
175 
176   struct ImmOp {
177     int64_t Val;
178     ImmTy Type;
179     bool IsFPImm;
180     mutable ImmKindTy Kind;
181     Modifiers Mods;
182   };
183 
184   struct RegOp {
185     unsigned RegNo;
186     Modifiers Mods;
187   };
188 
189   union {
190     TokOp Tok;
191     ImmOp Imm;
192     RegOp Reg;
193     const MCExpr *Expr;
194   };
195 
196 public:
197   bool isToken() const override {
198     if (Kind == Token)
199       return true;
200 
201     // When parsing operands, we can't always tell if something was meant to be
202     // a token, like 'gds', or an expression that references a global variable.
203     // In this case, we assume the string is an expression, and if we need to
204     // interpret is a token, then we treat the symbol name as the token.
205     return isSymbolRefExpr();
206   }
207 
208   bool isSymbolRefExpr() const {
209     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
210   }
211 
212   bool isImm() const override {
213     return Kind == Immediate;
214   }
215 
216   void setImmKindNone() const {
217     assert(isImm());
218     Imm.Kind = ImmKindTyNone;
219   }
220 
221   void setImmKindLiteral() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyLiteral;
224   }
225 
226   void setImmKindConst() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyConst;
229   }
230 
231   bool IsImmKindLiteral() const {
232     return isImm() && Imm.Kind == ImmKindTyLiteral;
233   }
234 
235   bool isImmKindConst() const {
236     return isImm() && Imm.Kind == ImmKindTyConst;
237   }
238 
239   bool isInlinableImm(MVT type) const;
240   bool isLiteralImm(MVT type) const;
241 
242   bool isRegKind() const {
243     return Kind == Register;
244   }
245 
246   bool isReg() const override {
247     return isRegKind() && !hasModifiers();
248   }
249 
250   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
251     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
252   }
253 
254   bool isRegOrImmWithInt16InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
256   }
257 
258   bool isRegOrImmWithInt32InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
260   }
261 
262   bool isRegOrImmWithInt64InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
264   }
265 
266   bool isRegOrImmWithFP16InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
268   }
269 
270   bool isRegOrImmWithFP32InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
272   }
273 
274   bool isRegOrImmWithFP64InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
276   }
277 
278   bool isVReg() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
280            isRegClass(AMDGPU::VReg_64RegClassID) ||
281            isRegClass(AMDGPU::VReg_96RegClassID) ||
282            isRegClass(AMDGPU::VReg_128RegClassID) ||
283            isRegClass(AMDGPU::VReg_160RegClassID) ||
284            isRegClass(AMDGPU::VReg_192RegClassID) ||
285            isRegClass(AMDGPU::VReg_256RegClassID) ||
286            isRegClass(AMDGPU::VReg_512RegClassID) ||
287            isRegClass(AMDGPU::VReg_1024RegClassID);
288   }
289 
290   bool isVReg32() const {
291     return isRegClass(AMDGPU::VGPR_32RegClassID);
292   }
293 
294   bool isVReg32OrOff() const {
295     return isOff() || isVReg32();
296   }
297 
298   bool isNull() const {
299     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
300   }
301 
302   bool isSDWAOperand(MVT type) const;
303   bool isSDWAFP16Operand() const;
304   bool isSDWAFP32Operand() const;
305   bool isSDWAInt16Operand() const;
306   bool isSDWAInt32Operand() const;
307 
308   bool isImmTy(ImmTy ImmT) const {
309     return isImm() && Imm.Type == ImmT;
310   }
311 
312   bool isImmModifier() const {
313     return isImm() && Imm.Type != ImmTyNone;
314   }
315 
316   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
317   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
318   bool isDMask() const { return isImmTy(ImmTyDMask); }
319   bool isDim() const { return isImmTy(ImmTyDim); }
320   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
321   bool isDA() const { return isImmTy(ImmTyDA); }
322   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
323   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
324   bool isLWE() const { return isImmTy(ImmTyLWE); }
325   bool isOff() const { return isImmTy(ImmTyOff); }
326   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
327   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
328   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
329   bool isOffen() const { return isImmTy(ImmTyOffen); }
330   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
331   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
332   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
333   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
334   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
335 
336   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
337   bool isGDS() const { return isImmTy(ImmTyGDS); }
338   bool isLDS() const { return isImmTy(ImmTyLDS); }
339   bool isDLC() const { return isImmTy(ImmTyDLC); }
340   bool isGLC() const { return isImmTy(ImmTyGLC); }
341   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
342   // value of the GLC operand.
343   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
344   bool isSLC() const { return isImmTy(ImmTySLC); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcOrLdsB32() const {
453     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
454            isLiteralImm(MVT::i32) || isExpr();
455   }
456 
457   bool isVCSrcB32() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
459   }
460 
461   bool isVCSrcB64() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
463   }
464 
465   bool isVCSrcB16() const {
466     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
467   }
468 
469   bool isVCSrcV2B16() const {
470     return isVCSrcB16();
471   }
472 
473   bool isVCSrcF32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
475   }
476 
477   bool isVCSrcF64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
479   }
480 
481   bool isVCSrcF16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
483   }
484 
485   bool isVCSrcV2F16() const {
486     return isVCSrcF16();
487   }
488 
489   bool isVSrcB32() const {
490     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
491   }
492 
493   bool isVSrcB64() const {
494     return isVCSrcF64() || isLiteralImm(MVT::i64);
495   }
496 
497   bool isVSrcB16() const {
498     return isVCSrcB16() || isLiteralImm(MVT::i16);
499   }
500 
501   bool isVSrcV2B16() const {
502     return isVSrcB16() || isLiteralImm(MVT::v2i16);
503   }
504 
505   bool isVSrcF32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
507   }
508 
509   bool isVSrcF64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::f64);
511   }
512 
513   bool isVSrcF16() const {
514     return isVCSrcF16() || isLiteralImm(MVT::f16);
515   }
516 
517   bool isVSrcV2F16() const {
518     return isVSrcF16() || isLiteralImm(MVT::v2f16);
519   }
520 
521   bool isVISrcB32() const {
522     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
523   }
524 
525   bool isVISrcB16() const {
526     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
527   }
528 
529   bool isVISrcV2B16() const {
530     return isVISrcB16();
531   }
532 
533   bool isVISrcF32() const {
534     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
535   }
536 
537   bool isVISrcF16() const {
538     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
539   }
540 
541   bool isVISrcV2F16() const {
542     return isVISrcF16() || isVISrcB32();
543   }
544 
545   bool isAISrcB32() const {
546     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
547   }
548 
549   bool isAISrcB16() const {
550     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
551   }
552 
553   bool isAISrcV2B16() const {
554     return isAISrcB16();
555   }
556 
557   bool isAISrcF32() const {
558     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
559   }
560 
561   bool isAISrcF16() const {
562     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
563   }
564 
565   bool isAISrcV2F16() const {
566     return isAISrcF16() || isAISrcB32();
567   }
568 
569   bool isAISrc_128B32() const {
570     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
571   }
572 
573   bool isAISrc_128B16() const {
574     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
575   }
576 
577   bool isAISrc_128V2B16() const {
578     return isAISrc_128B16();
579   }
580 
581   bool isAISrc_128F32() const {
582     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
583   }
584 
585   bool isAISrc_128F16() const {
586     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
587   }
588 
589   bool isAISrc_128V2F16() const {
590     return isAISrc_128F16() || isAISrc_128B32();
591   }
592 
593   bool isAISrc_512B32() const {
594     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
595   }
596 
597   bool isAISrc_512B16() const {
598     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
599   }
600 
601   bool isAISrc_512V2B16() const {
602     return isAISrc_512B16();
603   }
604 
605   bool isAISrc_512F32() const {
606     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
607   }
608 
609   bool isAISrc_512F16() const {
610     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
611   }
612 
613   bool isAISrc_512V2F16() const {
614     return isAISrc_512F16() || isAISrc_512B32();
615   }
616 
617   bool isAISrc_1024B32() const {
618     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
619   }
620 
621   bool isAISrc_1024B16() const {
622     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
623   }
624 
625   bool isAISrc_1024V2B16() const {
626     return isAISrc_1024B16();
627   }
628 
629   bool isAISrc_1024F32() const {
630     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
631   }
632 
633   bool isAISrc_1024F16() const {
634     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
635   }
636 
637   bool isAISrc_1024V2F16() const {
638     return isAISrc_1024F16() || isAISrc_1024B32();
639   }
640 
641   bool isKImmFP32() const {
642     return isLiteralImm(MVT::f32);
643   }
644 
645   bool isKImmFP16() const {
646     return isLiteralImm(MVT::f16);
647   }
648 
649   bool isMem() const override {
650     return false;
651   }
652 
653   bool isExpr() const {
654     return Kind == Expression;
655   }
656 
657   bool isSoppBrTarget() const {
658     return isExpr() || isImm();
659   }
660 
661   bool isSWaitCnt() const;
662   bool isHwreg() const;
663   bool isSendMsg() const;
664   bool isSwizzle() const;
665   bool isSMRDOffset8() const;
666   bool isSMEMOffset() const;
667   bool isSMRDLiteralOffset() const;
668   bool isDPP8() const;
669   bool isDPPCtrl() const;
670   bool isBLGP() const;
671   bool isCBSZ() const;
672   bool isABID() const;
673   bool isGPRIdxMode() const;
674   bool isS16Imm() const;
675   bool isU16Imm() const;
676   bool isEndpgm() const;
677 
678   StringRef getExpressionAsToken() const {
679     assert(isExpr());
680     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
681     return S->getSymbol().getName();
682   }
683 
684   StringRef getToken() const {
685     assert(isToken());
686 
687     if (Kind == Expression)
688       return getExpressionAsToken();
689 
690     return StringRef(Tok.Data, Tok.Length);
691   }
692 
693   int64_t getImm() const {
694     assert(isImm());
695     return Imm.Val;
696   }
697 
698   void setImm(int64_t Val) {
699     assert(isImm());
700     Imm.Val = Val;
701   }
702 
703   ImmTy getImmTy() const {
704     assert(isImm());
705     return Imm.Type;
706   }
707 
708   unsigned getReg() const override {
709     assert(isRegKind());
710     return Reg.RegNo;
711   }
712 
713   SMLoc getStartLoc() const override {
714     return StartLoc;
715   }
716 
717   SMLoc getEndLoc() const override {
718     return EndLoc;
719   }
720 
721   SMRange getLocRange() const {
722     return SMRange(StartLoc, EndLoc);
723   }
724 
725   Modifiers getModifiers() const {
726     assert(isRegKind() || isImmTy(ImmTyNone));
727     return isRegKind() ? Reg.Mods : Imm.Mods;
728   }
729 
730   void setModifiers(Modifiers Mods) {
731     assert(isRegKind() || isImmTy(ImmTyNone));
732     if (isRegKind())
733       Reg.Mods = Mods;
734     else
735       Imm.Mods = Mods;
736   }
737 
738   bool hasModifiers() const {
739     return getModifiers().hasModifiers();
740   }
741 
742   bool hasFPModifiers() const {
743     return getModifiers().hasFPModifiers();
744   }
745 
746   bool hasIntModifiers() const {
747     return getModifiers().hasIntModifiers();
748   }
749 
750   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
751 
752   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
753 
754   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
755 
756   template <unsigned Bitwidth>
757   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
758 
759   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
760     addKImmFPOperands<16>(Inst, N);
761   }
762 
763   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
764     addKImmFPOperands<32>(Inst, N);
765   }
766 
767   void addRegOperands(MCInst &Inst, unsigned N) const;
768 
769   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
770     addRegOperands(Inst, N);
771   }
772 
773   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
774     if (isRegKind())
775       addRegOperands(Inst, N);
776     else if (isExpr())
777       Inst.addOperand(MCOperand::createExpr(Expr));
778     else
779       addImmOperands(Inst, N);
780   }
781 
782   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     if (isRegKind()) {
786       addRegOperands(Inst, N);
787     } else {
788       addImmOperands(Inst, N, false);
789     }
790   }
791 
792   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
793     assert(!hasIntModifiers());
794     addRegOrImmWithInputModsOperands(Inst, N);
795   }
796 
797   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasFPModifiers());
799     addRegOrImmWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
803     Modifiers Mods = getModifiers();
804     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
805     assert(isRegKind());
806     addRegOperands(Inst, N);
807   }
808 
809   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
810     assert(!hasIntModifiers());
811     addRegWithInputModsOperands(Inst, N);
812   }
813 
814   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
815     assert(!hasFPModifiers());
816     addRegWithInputModsOperands(Inst, N);
817   }
818 
819   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
820     if (isImm())
821       addImmOperands(Inst, N);
822     else {
823       assert(isExpr());
824       Inst.addOperand(MCOperand::createExpr(Expr));
825     }
826   }
827 
828   static void printImmTy(raw_ostream& OS, ImmTy Type) {
829     switch (Type) {
830     case ImmTyNone: OS << "None"; break;
831     case ImmTyGDS: OS << "GDS"; break;
832     case ImmTyLDS: OS << "LDS"; break;
833     case ImmTyOffen: OS << "Offen"; break;
834     case ImmTyIdxen: OS << "Idxen"; break;
835     case ImmTyAddr64: OS << "Addr64"; break;
836     case ImmTyOffset: OS << "Offset"; break;
837     case ImmTyInstOffset: OS << "InstOffset"; break;
838     case ImmTyOffset0: OS << "Offset0"; break;
839     case ImmTyOffset1: OS << "Offset1"; break;
840     case ImmTyDLC: OS << "DLC"; break;
841     case ImmTyGLC: OS << "GLC"; break;
842     case ImmTySLC: OS << "SLC"; break;
843     case ImmTySWZ: OS << "SWZ"; break;
844     case ImmTyTFE: OS << "TFE"; break;
845     case ImmTyD16: OS << "D16"; break;
846     case ImmTyFORMAT: OS << "FORMAT"; break;
847     case ImmTyClampSI: OS << "ClampSI"; break;
848     case ImmTyOModSI: OS << "OModSI"; break;
849     case ImmTyDPP8: OS << "DPP8"; break;
850     case ImmTyDppCtrl: OS << "DppCtrl"; break;
851     case ImmTyDppRowMask: OS << "DppRowMask"; break;
852     case ImmTyDppBankMask: OS << "DppBankMask"; break;
853     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
854     case ImmTyDppFi: OS << "FI"; break;
855     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
856     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
857     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
858     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
859     case ImmTyDMask: OS << "DMask"; break;
860     case ImmTyDim: OS << "Dim"; break;
861     case ImmTyUNorm: OS << "UNorm"; break;
862     case ImmTyDA: OS << "DA"; break;
863     case ImmTyR128A16: OS << "R128A16"; break;
864     case ImmTyA16: OS << "A16"; break;
865     case ImmTyLWE: OS << "LWE"; break;
866     case ImmTyOff: OS << "Off"; break;
867     case ImmTyExpTgt: OS << "ExpTgt"; break;
868     case ImmTyExpCompr: OS << "ExpCompr"; break;
869     case ImmTyExpVM: OS << "ExpVM"; break;
870     case ImmTyHwreg: OS << "Hwreg"; break;
871     case ImmTySendMsg: OS << "SendMsg"; break;
872     case ImmTyInterpSlot: OS << "InterpSlot"; break;
873     case ImmTyInterpAttr: OS << "InterpAttr"; break;
874     case ImmTyAttrChan: OS << "AttrChan"; break;
875     case ImmTyOpSel: OS << "OpSel"; break;
876     case ImmTyOpSelHi: OS << "OpSelHi"; break;
877     case ImmTyNegLo: OS << "NegLo"; break;
878     case ImmTyNegHi: OS << "NegHi"; break;
879     case ImmTySwizzle: OS << "Swizzle"; break;
880     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
881     case ImmTyHigh: OS << "High"; break;
882     case ImmTyBLGP: OS << "BLGP"; break;
883     case ImmTyCBSZ: OS << "CBSZ"; break;
884     case ImmTyABID: OS << "ABID"; break;
885     case ImmTyEndpgm: OS << "Endpgm"; break;
886     }
887   }
888 
889   void print(raw_ostream &OS) const override {
890     switch (Kind) {
891     case Register:
892       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
893       break;
894     case Immediate:
895       OS << '<' << getImm();
896       if (getImmTy() != ImmTyNone) {
897         OS << " type: "; printImmTy(OS, getImmTy());
898       }
899       OS << " mods: " << Imm.Mods << '>';
900       break;
901     case Token:
902       OS << '\'' << getToken() << '\'';
903       break;
904     case Expression:
905       OS << "<expr " << *Expr << '>';
906       break;
907     }
908   }
909 
910   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
911                                       int64_t Val, SMLoc Loc,
912                                       ImmTy Type = ImmTyNone,
913                                       bool IsFPImm = false) {
914     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
915     Op->Imm.Val = Val;
916     Op->Imm.IsFPImm = IsFPImm;
917     Op->Imm.Kind = ImmKindTyNone;
918     Op->Imm.Type = Type;
919     Op->Imm.Mods = Modifiers();
920     Op->StartLoc = Loc;
921     Op->EndLoc = Loc;
922     return Op;
923   }
924 
925   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
926                                         StringRef Str, SMLoc Loc,
927                                         bool HasExplicitEncodingSize = true) {
928     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
929     Res->Tok.Data = Str.data();
930     Res->Tok.Length = Str.size();
931     Res->StartLoc = Loc;
932     Res->EndLoc = Loc;
933     return Res;
934   }
935 
936   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
937                                       unsigned RegNo, SMLoc S,
938                                       SMLoc E) {
939     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
940     Op->Reg.RegNo = RegNo;
941     Op->Reg.Mods = Modifiers();
942     Op->StartLoc = S;
943     Op->EndLoc = E;
944     return Op;
945   }
946 
947   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
948                                        const class MCExpr *Expr, SMLoc S) {
949     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
950     Op->Expr = Expr;
951     Op->StartLoc = S;
952     Op->EndLoc = S;
953     return Op;
954   }
955 };
956 
957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
958   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
959   return OS;
960 }
961 
962 //===----------------------------------------------------------------------===//
963 // AsmParser
964 //===----------------------------------------------------------------------===//
965 
966 // Holds info related to the current kernel, e.g. count of SGPRs used.
967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
968 // .amdgpu_hsa_kernel or at EOF.
969 class KernelScopeInfo {
970   int SgprIndexUnusedMin = -1;
971   int VgprIndexUnusedMin = -1;
972   MCContext *Ctx = nullptr;
973 
974   void usesSgprAt(int i) {
975     if (i >= SgprIndexUnusedMin) {
976       SgprIndexUnusedMin = ++i;
977       if (Ctx) {
978         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
979         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
980       }
981     }
982   }
983 
984   void usesVgprAt(int i) {
985     if (i >= VgprIndexUnusedMin) {
986       VgprIndexUnusedMin = ++i;
987       if (Ctx) {
988         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
989         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
990       }
991     }
992   }
993 
994 public:
995   KernelScopeInfo() = default;
996 
997   void initialize(MCContext &Context) {
998     Ctx = &Context;
999     usesSgprAt(SgprIndexUnusedMin = -1);
1000     usesVgprAt(VgprIndexUnusedMin = -1);
1001   }
1002 
1003   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1004     switch (RegKind) {
1005       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1006       case IS_AGPR: // fall through
1007       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1008       default: break;
1009     }
1010   }
1011 };
1012 
1013 class AMDGPUAsmParser : public MCTargetAsmParser {
1014   MCAsmParser &Parser;
1015 
1016   // Number of extra operands parsed after the first optional operand.
1017   // This may be necessary to skip hardcoded mandatory operands.
1018   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1019 
1020   unsigned ForcedEncodingSize = 0;
1021   bool ForcedDPP = false;
1022   bool ForcedSDWA = false;
1023   KernelScopeInfo KernelScope;
1024 
1025   /// @name Auto-generated Match Functions
1026   /// {
1027 
1028 #define GET_ASSEMBLER_HEADER
1029 #include "AMDGPUGenAsmMatcher.inc"
1030 
1031   /// }
1032 
1033 private:
1034   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1035   bool OutOfRangeError(SMRange Range);
1036   /// Calculate VGPR/SGPR blocks required for given target, reserved
1037   /// registers, and user-specified NextFreeXGPR values.
1038   ///
1039   /// \param Features [in] Target features, used for bug corrections.
1040   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1041   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1042   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1043   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1044   /// descriptor field, if valid.
1045   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1046   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1047   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1048   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1049   /// \param VGPRBlocks [out] Result VGPR block count.
1050   /// \param SGPRBlocks [out] Result SGPR block count.
1051   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1052                           bool FlatScrUsed, bool XNACKUsed,
1053                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1054                           SMRange VGPRRange, unsigned NextFreeSGPR,
1055                           SMRange SGPRRange, unsigned &VGPRBlocks,
1056                           unsigned &SGPRBlocks);
1057   bool ParseDirectiveAMDGCNTarget();
1058   bool ParseDirectiveAMDHSAKernel();
1059   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1060   bool ParseDirectiveHSACodeObjectVersion();
1061   bool ParseDirectiveHSACodeObjectISA();
1062   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1063   bool ParseDirectiveAMDKernelCodeT();
1064   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1065   bool ParseDirectiveAMDGPUHsaKernel();
1066 
1067   bool ParseDirectiveISAVersion();
1068   bool ParseDirectiveHSAMetadata();
1069   bool ParseDirectivePALMetadataBegin();
1070   bool ParseDirectivePALMetadata();
1071   bool ParseDirectiveAMDGPULDS();
1072 
1073   /// Common code to parse out a block of text (typically YAML) between start and
1074   /// end directives.
1075   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1076                            const char *AssemblerDirectiveEnd,
1077                            std::string &CollectString);
1078 
1079   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1080                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1081   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1082                            unsigned &RegNum, unsigned &RegWidth,
1083                            bool RestoreOnFailure = false);
1084   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1085                            unsigned &RegNum, unsigned &RegWidth,
1086                            SmallVectorImpl<AsmToken> &Tokens);
1087   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1088                            unsigned &RegWidth,
1089                            SmallVectorImpl<AsmToken> &Tokens);
1090   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1091                            unsigned &RegWidth,
1092                            SmallVectorImpl<AsmToken> &Tokens);
1093   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1094                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1095   bool ParseRegRange(unsigned& Num, unsigned& Width);
1096   unsigned getRegularReg(RegisterKind RegKind,
1097                          unsigned RegNum,
1098                          unsigned RegWidth,
1099                          SMLoc Loc);
1100 
1101   bool isRegister();
1102   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1103   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1104   void initializeGprCountSymbol(RegisterKind RegKind);
1105   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1106                              unsigned RegWidth);
1107   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1108                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1109   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1110                  bool IsGdsHardcoded);
1111 
1112 public:
1113   enum AMDGPUMatchResultTy {
1114     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1115   };
1116   enum OperandMode {
1117     OperandMode_Default,
1118     OperandMode_NSA,
1119   };
1120 
1121   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1122 
1123   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1124                const MCInstrInfo &MII,
1125                const MCTargetOptions &Options)
1126       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1127     MCAsmParserExtension::Initialize(Parser);
1128 
1129     if (getFeatureBits().none()) {
1130       // Set default features.
1131       copySTI().ToggleFeature("southern-islands");
1132     }
1133 
1134     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1135 
1136     {
1137       // TODO: make those pre-defined variables read-only.
1138       // Currently there is none suitable machinery in the core llvm-mc for this.
1139       // MCSymbol::isRedefinable is intended for another purpose, and
1140       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1141       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1142       MCContext &Ctx = getContext();
1143       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1144         MCSymbol *Sym =
1145             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1148         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151       } else {
1152         MCSymbol *Sym =
1153             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1154         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1155         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1156         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1157         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1158         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1159       }
1160       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1161         initializeGprCountSymbol(IS_VGPR);
1162         initializeGprCountSymbol(IS_SGPR);
1163       } else
1164         KernelScope.initialize(getContext());
1165     }
1166   }
1167 
1168   bool hasXNACK() const {
1169     return AMDGPU::hasXNACK(getSTI());
1170   }
1171 
1172   bool hasMIMG_R128() const {
1173     return AMDGPU::hasMIMG_R128(getSTI());
1174   }
1175 
1176   bool hasPackedD16() const {
1177     return AMDGPU::hasPackedD16(getSTI());
1178   }
1179 
1180   bool hasGFX10A16() const {
1181     return AMDGPU::hasGFX10A16(getSTI());
1182   }
1183 
1184   bool isSI() const {
1185     return AMDGPU::isSI(getSTI());
1186   }
1187 
1188   bool isCI() const {
1189     return AMDGPU::isCI(getSTI());
1190   }
1191 
1192   bool isVI() const {
1193     return AMDGPU::isVI(getSTI());
1194   }
1195 
1196   bool isGFX9() const {
1197     return AMDGPU::isGFX9(getSTI());
1198   }
1199 
1200   bool isGFX9Plus() const {
1201     return AMDGPU::isGFX9Plus(getSTI());
1202   }
1203 
1204   bool isGFX10() const {
1205     return AMDGPU::isGFX10(getSTI());
1206   }
1207 
1208   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1209 
1210   bool isGFX10_BEncoding() const {
1211     return AMDGPU::isGFX10_BEncoding(getSTI());
1212   }
1213 
1214   bool hasInv2PiInlineImm() const {
1215     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1216   }
1217 
1218   bool hasFlatOffsets() const {
1219     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1220   }
1221 
1222   bool hasSGPR102_SGPR103() const {
1223     return !isVI() && !isGFX9();
1224   }
1225 
1226   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1227 
1228   bool hasIntClamp() const {
1229     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1230   }
1231 
1232   AMDGPUTargetStreamer &getTargetStreamer() {
1233     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1234     return static_cast<AMDGPUTargetStreamer &>(TS);
1235   }
1236 
1237   const MCRegisterInfo *getMRI() const {
1238     // We need this const_cast because for some reason getContext() is not const
1239     // in MCAsmParser.
1240     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1241   }
1242 
1243   const MCInstrInfo *getMII() const {
1244     return &MII;
1245   }
1246 
1247   const FeatureBitset &getFeatureBits() const {
1248     return getSTI().getFeatureBits();
1249   }
1250 
1251   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1252   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1253   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1254 
1255   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1256   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1257   bool isForcedDPP() const { return ForcedDPP; }
1258   bool isForcedSDWA() const { return ForcedSDWA; }
1259   ArrayRef<unsigned> getMatchedVariants() const;
1260   StringRef getMatchedVariantName() const;
1261 
1262   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1263   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1264                      bool RestoreOnFailure);
1265   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1266   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1267                                         SMLoc &EndLoc) override;
1268   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1269   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1270                                       unsigned Kind) override;
1271   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1272                                OperandVector &Operands, MCStreamer &Out,
1273                                uint64_t &ErrorInfo,
1274                                bool MatchingInlineAsm) override;
1275   bool ParseDirective(AsmToken DirectiveID) override;
1276   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1277                                     OperandMode Mode = OperandMode_Default);
1278   StringRef parseMnemonicSuffix(StringRef Name);
1279   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1280                         SMLoc NameLoc, OperandVector &Operands) override;
1281   //bool ProcessInstruction(MCInst &Inst);
1282 
1283   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1284 
1285   OperandMatchResultTy
1286   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1287                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1288                      bool (*ConvertResult)(int64_t &) = nullptr);
1289 
1290   OperandMatchResultTy
1291   parseOperandArrayWithPrefix(const char *Prefix,
1292                               OperandVector &Operands,
1293                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1294                               bool (*ConvertResult)(int64_t&) = nullptr);
1295 
1296   OperandMatchResultTy
1297   parseNamedBit(const char *Name, OperandVector &Operands,
1298                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1299   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1300                                              StringRef &Value,
1301                                              SMLoc &StringLoc);
1302 
1303   bool isModifier();
1304   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1305   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1306   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1307   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1308   bool parseSP3NegModifier();
1309   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1310   OperandMatchResultTy parseReg(OperandVector &Operands);
1311   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1312   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1313   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1314   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1315   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1316   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1317   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1318   OperandMatchResultTy parseUfmt(int64_t &Format);
1319   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1320   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1321   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1322   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1323   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1324   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1325   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1326 
1327   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1328   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1329   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1330   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1331 
1332   bool parseCnt(int64_t &IntVal);
1333   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1334   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1335 
1336 private:
1337   struct OperandInfoTy {
1338     SMLoc Loc;
1339     int64_t Id;
1340     bool IsSymbolic = false;
1341     bool IsDefined = false;
1342 
1343     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1344   };
1345 
1346   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1347   bool validateSendMsg(const OperandInfoTy &Msg,
1348                        const OperandInfoTy &Op,
1349                        const OperandInfoTy &Stream);
1350 
1351   bool parseHwregBody(OperandInfoTy &HwReg,
1352                       OperandInfoTy &Offset,
1353                       OperandInfoTy &Width);
1354   bool validateHwreg(const OperandInfoTy &HwReg,
1355                      const OperandInfoTy &Offset,
1356                      const OperandInfoTy &Width);
1357 
1358   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1359   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1360   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1361 
1362   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1363                       const OperandVector &Operands) const;
1364   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1365   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1366   SMLoc getLitLoc(const OperandVector &Operands) const;
1367   SMLoc getConstLoc(const OperandVector &Operands) const;
1368 
1369   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1370   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1371   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1372   bool validateSOPLiteral(const MCInst &Inst) const;
1373   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1374   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1375   bool validateIntClampSupported(const MCInst &Inst);
1376   bool validateMIMGAtomicDMask(const MCInst &Inst);
1377   bool validateMIMGGatherDMask(const MCInst &Inst);
1378   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1379   bool validateMIMGDataSize(const MCInst &Inst);
1380   bool validateMIMGAddrSize(const MCInst &Inst);
1381   bool validateMIMGD16(const MCInst &Inst);
1382   bool validateMIMGDim(const MCInst &Inst);
1383   bool validateLdsDirect(const MCInst &Inst);
1384   bool validateOpSel(const MCInst &Inst);
1385   bool validateVccOperand(unsigned Reg) const;
1386   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1387   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1388   bool validateDivScale(const MCInst &Inst);
1389   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1390                              const SMLoc &IDLoc);
1391   unsigned getConstantBusLimit(unsigned Opcode) const;
1392   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1393   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1394   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1395 
1396   bool isSupportedMnemo(StringRef Mnemo,
1397                         const FeatureBitset &FBS);
1398   bool isSupportedMnemo(StringRef Mnemo,
1399                         const FeatureBitset &FBS,
1400                         ArrayRef<unsigned> Variants);
1401   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1402 
1403   bool isId(const StringRef Id) const;
1404   bool isId(const AsmToken &Token, const StringRef Id) const;
1405   bool isToken(const AsmToken::TokenKind Kind) const;
1406   bool trySkipId(const StringRef Id);
1407   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1408   bool trySkipToken(const AsmToken::TokenKind Kind);
1409   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1410   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1411   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1412 
1413   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1414   AsmToken::TokenKind getTokenKind() const;
1415   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1416   bool parseExpr(OperandVector &Operands);
1417   StringRef getTokenStr() const;
1418   AsmToken peekToken();
1419   AsmToken getToken() const;
1420   SMLoc getLoc() const;
1421   void lex();
1422 
1423 public:
1424   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1425   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1426 
1427   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1428   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1429   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1430   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1431   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1432   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1433 
1434   bool parseSwizzleOperand(int64_t &Op,
1435                            const unsigned MinVal,
1436                            const unsigned MaxVal,
1437                            const StringRef ErrMsg,
1438                            SMLoc &Loc);
1439   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1440                             const unsigned MinVal,
1441                             const unsigned MaxVal,
1442                             const StringRef ErrMsg);
1443   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1444   bool parseSwizzleOffset(int64_t &Imm);
1445   bool parseSwizzleMacro(int64_t &Imm);
1446   bool parseSwizzleQuadPerm(int64_t &Imm);
1447   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1448   bool parseSwizzleBroadcast(int64_t &Imm);
1449   bool parseSwizzleSwap(int64_t &Imm);
1450   bool parseSwizzleReverse(int64_t &Imm);
1451 
1452   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1453   int64_t parseGPRIdxMacro();
1454 
1455   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1456   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1457   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1458   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1459   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1460 
1461   AMDGPUOperand::Ptr defaultDLC() const;
1462   AMDGPUOperand::Ptr defaultGLC() const;
1463   AMDGPUOperand::Ptr defaultGLC_1() const;
1464   AMDGPUOperand::Ptr defaultSLC() const;
1465 
1466   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1467   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1468   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1469   AMDGPUOperand::Ptr defaultFlatOffset() const;
1470 
1471   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1472 
1473   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1474                OptionalImmIndexMap &OptionalIdx);
1475   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1476   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1477   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1478 
1479   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1480 
1481   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1482                bool IsAtomic = false);
1483   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1484   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1485 
1486   OperandMatchResultTy parseDim(OperandVector &Operands);
1487   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1488   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1489   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1490   int64_t parseDPPCtrlSel(StringRef Ctrl);
1491   int64_t parseDPPCtrlPerm();
1492   AMDGPUOperand::Ptr defaultRowMask() const;
1493   AMDGPUOperand::Ptr defaultBankMask() const;
1494   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1495   AMDGPUOperand::Ptr defaultFI() const;
1496   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1497   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1498 
1499   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1500                                     AMDGPUOperand::ImmTy Type);
1501   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1502   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1503   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1504   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1505   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1506   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1507   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1508                uint64_t BasicInstType,
1509                bool SkipDstVcc = false,
1510                bool SkipSrcVcc = false);
1511 
1512   AMDGPUOperand::Ptr defaultBLGP() const;
1513   AMDGPUOperand::Ptr defaultCBSZ() const;
1514   AMDGPUOperand::Ptr defaultABID() const;
1515 
1516   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1517   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1518 };
1519 
1520 struct OptionalOperand {
1521   const char *Name;
1522   AMDGPUOperand::ImmTy Type;
1523   bool IsBit;
1524   bool (*ConvertResult)(int64_t&);
1525 };
1526 
1527 } // end anonymous namespace
1528 
1529 // May be called with integer type with equivalent bitwidth.
1530 static const fltSemantics *getFltSemantics(unsigned Size) {
1531   switch (Size) {
1532   case 4:
1533     return &APFloat::IEEEsingle();
1534   case 8:
1535     return &APFloat::IEEEdouble();
1536   case 2:
1537     return &APFloat::IEEEhalf();
1538   default:
1539     llvm_unreachable("unsupported fp type");
1540   }
1541 }
1542 
1543 static const fltSemantics *getFltSemantics(MVT VT) {
1544   return getFltSemantics(VT.getSizeInBits() / 8);
1545 }
1546 
1547 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1548   switch (OperandType) {
1549   case AMDGPU::OPERAND_REG_IMM_INT32:
1550   case AMDGPU::OPERAND_REG_IMM_FP32:
1551   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1552   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1553   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1554   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1555     return &APFloat::IEEEsingle();
1556   case AMDGPU::OPERAND_REG_IMM_INT64:
1557   case AMDGPU::OPERAND_REG_IMM_FP64:
1558   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1559   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1560     return &APFloat::IEEEdouble();
1561   case AMDGPU::OPERAND_REG_IMM_INT16:
1562   case AMDGPU::OPERAND_REG_IMM_FP16:
1563   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1564   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1565   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1566   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1567   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1568   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1569   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1570   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1571   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1572   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1573     return &APFloat::IEEEhalf();
1574   default:
1575     llvm_unreachable("unsupported fp type");
1576   }
1577 }
1578 
1579 //===----------------------------------------------------------------------===//
1580 // Operand
1581 //===----------------------------------------------------------------------===//
1582 
1583 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1584   bool Lost;
1585 
1586   // Convert literal to single precision
1587   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1588                                                APFloat::rmNearestTiesToEven,
1589                                                &Lost);
1590   // We allow precision lost but not overflow or underflow
1591   if (Status != APFloat::opOK &&
1592       Lost &&
1593       ((Status & APFloat::opOverflow)  != 0 ||
1594        (Status & APFloat::opUnderflow) != 0)) {
1595     return false;
1596   }
1597 
1598   return true;
1599 }
1600 
1601 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1602   return isUIntN(Size, Val) || isIntN(Size, Val);
1603 }
1604 
1605 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1606   if (VT.getScalarType() == MVT::i16) {
1607     // FP immediate values are broken.
1608     return isInlinableIntLiteral(Val);
1609   }
1610 
1611   // f16/v2f16 operands work correctly for all values.
1612   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1613 }
1614 
1615 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1616 
1617   // This is a hack to enable named inline values like
1618   // shared_base with both 32-bit and 64-bit operands.
1619   // Note that these values are defined as
1620   // 32-bit operands only.
1621   if (isInlineValue()) {
1622     return true;
1623   }
1624 
1625   if (!isImmTy(ImmTyNone)) {
1626     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1627     return false;
1628   }
1629   // TODO: We should avoid using host float here. It would be better to
1630   // check the float bit values which is what a few other places do.
1631   // We've had bot failures before due to weird NaN support on mips hosts.
1632 
1633   APInt Literal(64, Imm.Val);
1634 
1635   if (Imm.IsFPImm) { // We got fp literal token
1636     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1637       return AMDGPU::isInlinableLiteral64(Imm.Val,
1638                                           AsmParser->hasInv2PiInlineImm());
1639     }
1640 
1641     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1642     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1643       return false;
1644 
1645     if (type.getScalarSizeInBits() == 16) {
1646       return isInlineableLiteralOp16(
1647         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1648         type, AsmParser->hasInv2PiInlineImm());
1649     }
1650 
1651     // Check if single precision literal is inlinable
1652     return AMDGPU::isInlinableLiteral32(
1653       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1654       AsmParser->hasInv2PiInlineImm());
1655   }
1656 
1657   // We got int literal token.
1658   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1659     return AMDGPU::isInlinableLiteral64(Imm.Val,
1660                                         AsmParser->hasInv2PiInlineImm());
1661   }
1662 
1663   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1664     return false;
1665   }
1666 
1667   if (type.getScalarSizeInBits() == 16) {
1668     return isInlineableLiteralOp16(
1669       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1670       type, AsmParser->hasInv2PiInlineImm());
1671   }
1672 
1673   return AMDGPU::isInlinableLiteral32(
1674     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1675     AsmParser->hasInv2PiInlineImm());
1676 }
1677 
1678 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1679   // Check that this immediate can be added as literal
1680   if (!isImmTy(ImmTyNone)) {
1681     return false;
1682   }
1683 
1684   if (!Imm.IsFPImm) {
1685     // We got int literal token.
1686 
1687     if (type == MVT::f64 && hasFPModifiers()) {
1688       // Cannot apply fp modifiers to int literals preserving the same semantics
1689       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1690       // disable these cases.
1691       return false;
1692     }
1693 
1694     unsigned Size = type.getSizeInBits();
1695     if (Size == 64)
1696       Size = 32;
1697 
1698     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1699     // types.
1700     return isSafeTruncation(Imm.Val, Size);
1701   }
1702 
1703   // We got fp literal token
1704   if (type == MVT::f64) { // Expected 64-bit fp operand
1705     // We would set low 64-bits of literal to zeroes but we accept this literals
1706     return true;
1707   }
1708 
1709   if (type == MVT::i64) { // Expected 64-bit int operand
1710     // We don't allow fp literals in 64-bit integer instructions. It is
1711     // unclear how we should encode them.
1712     return false;
1713   }
1714 
1715   // We allow fp literals with f16x2 operands assuming that the specified
1716   // literal goes into the lower half and the upper half is zero. We also
1717   // require that the literal may be losslesly converted to f16.
1718   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1719                      (type == MVT::v2i16)? MVT::i16 : type;
1720 
1721   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1722   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1723 }
1724 
1725 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1726   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1727 }
1728 
1729 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1730   if (AsmParser->isVI())
1731     return isVReg32();
1732   else if (AsmParser->isGFX9Plus())
1733     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1734   else
1735     return false;
1736 }
1737 
1738 bool AMDGPUOperand::isSDWAFP16Operand() const {
1739   return isSDWAOperand(MVT::f16);
1740 }
1741 
1742 bool AMDGPUOperand::isSDWAFP32Operand() const {
1743   return isSDWAOperand(MVT::f32);
1744 }
1745 
1746 bool AMDGPUOperand::isSDWAInt16Operand() const {
1747   return isSDWAOperand(MVT::i16);
1748 }
1749 
1750 bool AMDGPUOperand::isSDWAInt32Operand() const {
1751   return isSDWAOperand(MVT::i32);
1752 }
1753 
1754 bool AMDGPUOperand::isBoolReg() const {
1755   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1756          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1757 }
1758 
1759 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1760 {
1761   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1762   assert(Size == 2 || Size == 4 || Size == 8);
1763 
1764   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1765 
1766   if (Imm.Mods.Abs) {
1767     Val &= ~FpSignMask;
1768   }
1769   if (Imm.Mods.Neg) {
1770     Val ^= FpSignMask;
1771   }
1772 
1773   return Val;
1774 }
1775 
1776 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1777   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1778                              Inst.getNumOperands())) {
1779     addLiteralImmOperand(Inst, Imm.Val,
1780                          ApplyModifiers &
1781                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1782   } else {
1783     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1784     Inst.addOperand(MCOperand::createImm(Imm.Val));
1785     setImmKindNone();
1786   }
1787 }
1788 
1789 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1790   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1791   auto OpNum = Inst.getNumOperands();
1792   // Check that this operand accepts literals
1793   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1794 
1795   if (ApplyModifiers) {
1796     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1797     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1798     Val = applyInputFPModifiers(Val, Size);
1799   }
1800 
1801   APInt Literal(64, Val);
1802   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1803 
1804   if (Imm.IsFPImm) { // We got fp literal token
1805     switch (OpTy) {
1806     case AMDGPU::OPERAND_REG_IMM_INT64:
1807     case AMDGPU::OPERAND_REG_IMM_FP64:
1808     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1809     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1810       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1811                                        AsmParser->hasInv2PiInlineImm())) {
1812         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1813         setImmKindConst();
1814         return;
1815       }
1816 
1817       // Non-inlineable
1818       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1819         // For fp operands we check if low 32 bits are zeros
1820         if (Literal.getLoBits(32) != 0) {
1821           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1822           "Can't encode literal as exact 64-bit floating-point operand. "
1823           "Low 32-bits will be set to zero");
1824         }
1825 
1826         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1827         setImmKindLiteral();
1828         return;
1829       }
1830 
1831       // We don't allow fp literals in 64-bit integer instructions. It is
1832       // unclear how we should encode them. This case should be checked earlier
1833       // in predicate methods (isLiteralImm())
1834       llvm_unreachable("fp literal in 64-bit integer instruction.");
1835 
1836     case AMDGPU::OPERAND_REG_IMM_INT32:
1837     case AMDGPU::OPERAND_REG_IMM_FP32:
1838     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1839     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1840     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1841     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1842     case AMDGPU::OPERAND_REG_IMM_INT16:
1843     case AMDGPU::OPERAND_REG_IMM_FP16:
1844     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1845     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1846     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1847     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1848     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1849     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1850     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1851     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1852     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1853     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1854       bool lost;
1855       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1856       // Convert literal to single precision
1857       FPLiteral.convert(*getOpFltSemantics(OpTy),
1858                         APFloat::rmNearestTiesToEven, &lost);
1859       // We allow precision lost but not overflow or underflow. This should be
1860       // checked earlier in isLiteralImm()
1861 
1862       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1863       Inst.addOperand(MCOperand::createImm(ImmVal));
1864       setImmKindLiteral();
1865       return;
1866     }
1867     default:
1868       llvm_unreachable("invalid operand size");
1869     }
1870 
1871     return;
1872   }
1873 
1874   // We got int literal token.
1875   // Only sign extend inline immediates.
1876   switch (OpTy) {
1877   case AMDGPU::OPERAND_REG_IMM_INT32:
1878   case AMDGPU::OPERAND_REG_IMM_FP32:
1879   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1880   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1882   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1883   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1884   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1885     if (isSafeTruncation(Val, 32) &&
1886         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1887                                      AsmParser->hasInv2PiInlineImm())) {
1888       Inst.addOperand(MCOperand::createImm(Val));
1889       setImmKindConst();
1890       return;
1891     }
1892 
1893     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1894     setImmKindLiteral();
1895     return;
1896 
1897   case AMDGPU::OPERAND_REG_IMM_INT64:
1898   case AMDGPU::OPERAND_REG_IMM_FP64:
1899   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1900   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1901     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1902       Inst.addOperand(MCOperand::createImm(Val));
1903       setImmKindConst();
1904       return;
1905     }
1906 
1907     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1908     setImmKindLiteral();
1909     return;
1910 
1911   case AMDGPU::OPERAND_REG_IMM_INT16:
1912   case AMDGPU::OPERAND_REG_IMM_FP16:
1913   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1914   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1915   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1916   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1917     if (isSafeTruncation(Val, 16) &&
1918         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1919                                      AsmParser->hasInv2PiInlineImm())) {
1920       Inst.addOperand(MCOperand::createImm(Val));
1921       setImmKindConst();
1922       return;
1923     }
1924 
1925     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1926     setImmKindLiteral();
1927     return;
1928 
1929   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1930   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1931   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1932   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1933     assert(isSafeTruncation(Val, 16));
1934     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1935                                         AsmParser->hasInv2PiInlineImm()));
1936 
1937     Inst.addOperand(MCOperand::createImm(Val));
1938     return;
1939   }
1940   default:
1941     llvm_unreachable("invalid operand size");
1942   }
1943 }
1944 
1945 template <unsigned Bitwidth>
1946 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1947   APInt Literal(64, Imm.Val);
1948   setImmKindNone();
1949 
1950   if (!Imm.IsFPImm) {
1951     // We got int literal token.
1952     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1953     return;
1954   }
1955 
1956   bool Lost;
1957   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1958   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1959                     APFloat::rmNearestTiesToEven, &Lost);
1960   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1961 }
1962 
1963 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1964   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1965 }
1966 
1967 static bool isInlineValue(unsigned Reg) {
1968   switch (Reg) {
1969   case AMDGPU::SRC_SHARED_BASE:
1970   case AMDGPU::SRC_SHARED_LIMIT:
1971   case AMDGPU::SRC_PRIVATE_BASE:
1972   case AMDGPU::SRC_PRIVATE_LIMIT:
1973   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1974     return true;
1975   case AMDGPU::SRC_VCCZ:
1976   case AMDGPU::SRC_EXECZ:
1977   case AMDGPU::SRC_SCC:
1978     return true;
1979   case AMDGPU::SGPR_NULL:
1980     return true;
1981   default:
1982     return false;
1983   }
1984 }
1985 
1986 bool AMDGPUOperand::isInlineValue() const {
1987   return isRegKind() && ::isInlineValue(getReg());
1988 }
1989 
1990 //===----------------------------------------------------------------------===//
1991 // AsmParser
1992 //===----------------------------------------------------------------------===//
1993 
1994 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1995   if (Is == IS_VGPR) {
1996     switch (RegWidth) {
1997       default: return -1;
1998       case 1: return AMDGPU::VGPR_32RegClassID;
1999       case 2: return AMDGPU::VReg_64RegClassID;
2000       case 3: return AMDGPU::VReg_96RegClassID;
2001       case 4: return AMDGPU::VReg_128RegClassID;
2002       case 5: return AMDGPU::VReg_160RegClassID;
2003       case 6: return AMDGPU::VReg_192RegClassID;
2004       case 8: return AMDGPU::VReg_256RegClassID;
2005       case 16: return AMDGPU::VReg_512RegClassID;
2006       case 32: return AMDGPU::VReg_1024RegClassID;
2007     }
2008   } else if (Is == IS_TTMP) {
2009     switch (RegWidth) {
2010       default: return -1;
2011       case 1: return AMDGPU::TTMP_32RegClassID;
2012       case 2: return AMDGPU::TTMP_64RegClassID;
2013       case 4: return AMDGPU::TTMP_128RegClassID;
2014       case 8: return AMDGPU::TTMP_256RegClassID;
2015       case 16: return AMDGPU::TTMP_512RegClassID;
2016     }
2017   } else if (Is == IS_SGPR) {
2018     switch (RegWidth) {
2019       default: return -1;
2020       case 1: return AMDGPU::SGPR_32RegClassID;
2021       case 2: return AMDGPU::SGPR_64RegClassID;
2022       case 3: return AMDGPU::SGPR_96RegClassID;
2023       case 4: return AMDGPU::SGPR_128RegClassID;
2024       case 5: return AMDGPU::SGPR_160RegClassID;
2025       case 6: return AMDGPU::SGPR_192RegClassID;
2026       case 8: return AMDGPU::SGPR_256RegClassID;
2027       case 16: return AMDGPU::SGPR_512RegClassID;
2028     }
2029   } else if (Is == IS_AGPR) {
2030     switch (RegWidth) {
2031       default: return -1;
2032       case 1: return AMDGPU::AGPR_32RegClassID;
2033       case 2: return AMDGPU::AReg_64RegClassID;
2034       case 3: return AMDGPU::AReg_96RegClassID;
2035       case 4: return AMDGPU::AReg_128RegClassID;
2036       case 5: return AMDGPU::AReg_160RegClassID;
2037       case 6: return AMDGPU::AReg_192RegClassID;
2038       case 8: return AMDGPU::AReg_256RegClassID;
2039       case 16: return AMDGPU::AReg_512RegClassID;
2040       case 32: return AMDGPU::AReg_1024RegClassID;
2041     }
2042   }
2043   return -1;
2044 }
2045 
2046 static unsigned getSpecialRegForName(StringRef RegName) {
2047   return StringSwitch<unsigned>(RegName)
2048     .Case("exec", AMDGPU::EXEC)
2049     .Case("vcc", AMDGPU::VCC)
2050     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2051     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2052     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2053     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2054     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2055     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2056     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2057     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2058     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2059     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2060     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2061     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2062     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2063     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2064     .Case("m0", AMDGPU::M0)
2065     .Case("vccz", AMDGPU::SRC_VCCZ)
2066     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2067     .Case("execz", AMDGPU::SRC_EXECZ)
2068     .Case("src_execz", AMDGPU::SRC_EXECZ)
2069     .Case("scc", AMDGPU::SRC_SCC)
2070     .Case("src_scc", AMDGPU::SRC_SCC)
2071     .Case("tba", AMDGPU::TBA)
2072     .Case("tma", AMDGPU::TMA)
2073     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2074     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2075     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2076     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2077     .Case("vcc_lo", AMDGPU::VCC_LO)
2078     .Case("vcc_hi", AMDGPU::VCC_HI)
2079     .Case("exec_lo", AMDGPU::EXEC_LO)
2080     .Case("exec_hi", AMDGPU::EXEC_HI)
2081     .Case("tma_lo", AMDGPU::TMA_LO)
2082     .Case("tma_hi", AMDGPU::TMA_HI)
2083     .Case("tba_lo", AMDGPU::TBA_LO)
2084     .Case("tba_hi", AMDGPU::TBA_HI)
2085     .Case("pc", AMDGPU::PC_REG)
2086     .Case("null", AMDGPU::SGPR_NULL)
2087     .Default(AMDGPU::NoRegister);
2088 }
2089 
2090 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2091                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2092   auto R = parseRegister();
2093   if (!R) return true;
2094   assert(R->isReg());
2095   RegNo = R->getReg();
2096   StartLoc = R->getStartLoc();
2097   EndLoc = R->getEndLoc();
2098   return false;
2099 }
2100 
2101 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2102                                     SMLoc &EndLoc) {
2103   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2104 }
2105 
2106 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2107                                                        SMLoc &StartLoc,
2108                                                        SMLoc &EndLoc) {
2109   bool Result =
2110       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2111   bool PendingErrors = getParser().hasPendingError();
2112   getParser().clearPendingErrors();
2113   if (PendingErrors)
2114     return MatchOperand_ParseFail;
2115   if (Result)
2116     return MatchOperand_NoMatch;
2117   return MatchOperand_Success;
2118 }
2119 
2120 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2121                                             RegisterKind RegKind, unsigned Reg1,
2122                                             SMLoc Loc) {
2123   switch (RegKind) {
2124   case IS_SPECIAL:
2125     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2126       Reg = AMDGPU::EXEC;
2127       RegWidth = 2;
2128       return true;
2129     }
2130     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2131       Reg = AMDGPU::FLAT_SCR;
2132       RegWidth = 2;
2133       return true;
2134     }
2135     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2136       Reg = AMDGPU::XNACK_MASK;
2137       RegWidth = 2;
2138       return true;
2139     }
2140     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2141       Reg = AMDGPU::VCC;
2142       RegWidth = 2;
2143       return true;
2144     }
2145     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2146       Reg = AMDGPU::TBA;
2147       RegWidth = 2;
2148       return true;
2149     }
2150     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2151       Reg = AMDGPU::TMA;
2152       RegWidth = 2;
2153       return true;
2154     }
2155     Error(Loc, "register does not fit in the list");
2156     return false;
2157   case IS_VGPR:
2158   case IS_SGPR:
2159   case IS_AGPR:
2160   case IS_TTMP:
2161     if (Reg1 != Reg + RegWidth) {
2162       Error(Loc, "registers in a list must have consecutive indices");
2163       return false;
2164     }
2165     RegWidth++;
2166     return true;
2167   default:
2168     llvm_unreachable("unexpected register kind");
2169   }
2170 }
2171 
2172 struct RegInfo {
2173   StringLiteral Name;
2174   RegisterKind Kind;
2175 };
2176 
2177 static constexpr RegInfo RegularRegisters[] = {
2178   {{"v"},    IS_VGPR},
2179   {{"s"},    IS_SGPR},
2180   {{"ttmp"}, IS_TTMP},
2181   {{"acc"},  IS_AGPR},
2182   {{"a"},    IS_AGPR},
2183 };
2184 
2185 static bool isRegularReg(RegisterKind Kind) {
2186   return Kind == IS_VGPR ||
2187          Kind == IS_SGPR ||
2188          Kind == IS_TTMP ||
2189          Kind == IS_AGPR;
2190 }
2191 
2192 static const RegInfo* getRegularRegInfo(StringRef Str) {
2193   for (const RegInfo &Reg : RegularRegisters)
2194     if (Str.startswith(Reg.Name))
2195       return &Reg;
2196   return nullptr;
2197 }
2198 
2199 static bool getRegNum(StringRef Str, unsigned& Num) {
2200   return !Str.getAsInteger(10, Num);
2201 }
2202 
2203 bool
2204 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2205                             const AsmToken &NextToken) const {
2206 
2207   // A list of consecutive registers: [s0,s1,s2,s3]
2208   if (Token.is(AsmToken::LBrac))
2209     return true;
2210 
2211   if (!Token.is(AsmToken::Identifier))
2212     return false;
2213 
2214   // A single register like s0 or a range of registers like s[0:1]
2215 
2216   StringRef Str = Token.getString();
2217   const RegInfo *Reg = getRegularRegInfo(Str);
2218   if (Reg) {
2219     StringRef RegName = Reg->Name;
2220     StringRef RegSuffix = Str.substr(RegName.size());
2221     if (!RegSuffix.empty()) {
2222       unsigned Num;
2223       // A single register with an index: rXX
2224       if (getRegNum(RegSuffix, Num))
2225         return true;
2226     } else {
2227       // A range of registers: r[XX:YY].
2228       if (NextToken.is(AsmToken::LBrac))
2229         return true;
2230     }
2231   }
2232 
2233   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2234 }
2235 
2236 bool
2237 AMDGPUAsmParser::isRegister()
2238 {
2239   return isRegister(getToken(), peekToken());
2240 }
2241 
2242 unsigned
2243 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2244                                unsigned RegNum,
2245                                unsigned RegWidth,
2246                                SMLoc Loc) {
2247 
2248   assert(isRegularReg(RegKind));
2249 
2250   unsigned AlignSize = 1;
2251   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2252     // SGPR and TTMP registers must be aligned.
2253     // Max required alignment is 4 dwords.
2254     AlignSize = std::min(RegWidth, 4u);
2255   }
2256 
2257   if (RegNum % AlignSize != 0) {
2258     Error(Loc, "invalid register alignment");
2259     return AMDGPU::NoRegister;
2260   }
2261 
2262   unsigned RegIdx = RegNum / AlignSize;
2263   int RCID = getRegClass(RegKind, RegWidth);
2264   if (RCID == -1) {
2265     Error(Loc, "invalid or unsupported register size");
2266     return AMDGPU::NoRegister;
2267   }
2268 
2269   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2270   const MCRegisterClass RC = TRI->getRegClass(RCID);
2271   if (RegIdx >= RC.getNumRegs()) {
2272     Error(Loc, "register index is out of range");
2273     return AMDGPU::NoRegister;
2274   }
2275 
2276   return RC.getRegister(RegIdx);
2277 }
2278 
2279 bool
2280 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2281   int64_t RegLo, RegHi;
2282   if (!skipToken(AsmToken::LBrac, "missing register index"))
2283     return false;
2284 
2285   SMLoc FirstIdxLoc = getLoc();
2286   SMLoc SecondIdxLoc;
2287 
2288   if (!parseExpr(RegLo))
2289     return false;
2290 
2291   if (trySkipToken(AsmToken::Colon)) {
2292     SecondIdxLoc = getLoc();
2293     if (!parseExpr(RegHi))
2294       return false;
2295   } else {
2296     RegHi = RegLo;
2297   }
2298 
2299   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2300     return false;
2301 
2302   if (!isUInt<32>(RegLo)) {
2303     Error(FirstIdxLoc, "invalid register index");
2304     return false;
2305   }
2306 
2307   if (!isUInt<32>(RegHi)) {
2308     Error(SecondIdxLoc, "invalid register index");
2309     return false;
2310   }
2311 
2312   if (RegLo > RegHi) {
2313     Error(FirstIdxLoc, "first register index should not exceed second index");
2314     return false;
2315   }
2316 
2317   Num = static_cast<unsigned>(RegLo);
2318   Width = (RegHi - RegLo) + 1;
2319   return true;
2320 }
2321 
2322 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2323                                           unsigned &RegNum, unsigned &RegWidth,
2324                                           SmallVectorImpl<AsmToken> &Tokens) {
2325   assert(isToken(AsmToken::Identifier));
2326   unsigned Reg = getSpecialRegForName(getTokenStr());
2327   if (Reg) {
2328     RegNum = 0;
2329     RegWidth = 1;
2330     RegKind = IS_SPECIAL;
2331     Tokens.push_back(getToken());
2332     lex(); // skip register name
2333   }
2334   return Reg;
2335 }
2336 
2337 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2338                                           unsigned &RegNum, unsigned &RegWidth,
2339                                           SmallVectorImpl<AsmToken> &Tokens) {
2340   assert(isToken(AsmToken::Identifier));
2341   StringRef RegName = getTokenStr();
2342   auto Loc = getLoc();
2343 
2344   const RegInfo *RI = getRegularRegInfo(RegName);
2345   if (!RI) {
2346     Error(Loc, "invalid register name");
2347     return AMDGPU::NoRegister;
2348   }
2349 
2350   Tokens.push_back(getToken());
2351   lex(); // skip register name
2352 
2353   RegKind = RI->Kind;
2354   StringRef RegSuffix = RegName.substr(RI->Name.size());
2355   if (!RegSuffix.empty()) {
2356     // Single 32-bit register: vXX.
2357     if (!getRegNum(RegSuffix, RegNum)) {
2358       Error(Loc, "invalid register index");
2359       return AMDGPU::NoRegister;
2360     }
2361     RegWidth = 1;
2362   } else {
2363     // Range of registers: v[XX:YY]. ":YY" is optional.
2364     if (!ParseRegRange(RegNum, RegWidth))
2365       return AMDGPU::NoRegister;
2366   }
2367 
2368   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2369 }
2370 
2371 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2372                                        unsigned &RegWidth,
2373                                        SmallVectorImpl<AsmToken> &Tokens) {
2374   unsigned Reg = AMDGPU::NoRegister;
2375   auto ListLoc = getLoc();
2376 
2377   if (!skipToken(AsmToken::LBrac,
2378                  "expected a register or a list of registers")) {
2379     return AMDGPU::NoRegister;
2380   }
2381 
2382   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2383 
2384   auto Loc = getLoc();
2385   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2386     return AMDGPU::NoRegister;
2387   if (RegWidth != 1) {
2388     Error(Loc, "expected a single 32-bit register");
2389     return AMDGPU::NoRegister;
2390   }
2391 
2392   for (; trySkipToken(AsmToken::Comma); ) {
2393     RegisterKind NextRegKind;
2394     unsigned NextReg, NextRegNum, NextRegWidth;
2395     Loc = getLoc();
2396 
2397     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2398                              NextRegNum, NextRegWidth,
2399                              Tokens)) {
2400       return AMDGPU::NoRegister;
2401     }
2402     if (NextRegWidth != 1) {
2403       Error(Loc, "expected a single 32-bit register");
2404       return AMDGPU::NoRegister;
2405     }
2406     if (NextRegKind != RegKind) {
2407       Error(Loc, "registers in a list must be of the same kind");
2408       return AMDGPU::NoRegister;
2409     }
2410     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2411       return AMDGPU::NoRegister;
2412   }
2413 
2414   if (!skipToken(AsmToken::RBrac,
2415                  "expected a comma or a closing square bracket")) {
2416     return AMDGPU::NoRegister;
2417   }
2418 
2419   if (isRegularReg(RegKind))
2420     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2421 
2422   return Reg;
2423 }
2424 
2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2426                                           unsigned &RegNum, unsigned &RegWidth,
2427                                           SmallVectorImpl<AsmToken> &Tokens) {
2428   auto Loc = getLoc();
2429   Reg = AMDGPU::NoRegister;
2430 
2431   if (isToken(AsmToken::Identifier)) {
2432     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2433     if (Reg == AMDGPU::NoRegister)
2434       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2435   } else {
2436     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2437   }
2438 
2439   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2440   if (Reg == AMDGPU::NoRegister) {
2441     assert(Parser.hasPendingError());
2442     return false;
2443   }
2444 
2445   if (!subtargetHasRegister(*TRI, Reg)) {
2446     if (Reg == AMDGPU::SGPR_NULL) {
2447       Error(Loc, "'null' operand is not supported on this GPU");
2448     } else {
2449       Error(Loc, "register not available on this GPU");
2450     }
2451     return false;
2452   }
2453 
2454   return true;
2455 }
2456 
2457 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2458                                           unsigned &RegNum, unsigned &RegWidth,
2459                                           bool RestoreOnFailure /*=false*/) {
2460   Reg = AMDGPU::NoRegister;
2461 
2462   SmallVector<AsmToken, 1> Tokens;
2463   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2464     if (RestoreOnFailure) {
2465       while (!Tokens.empty()) {
2466         getLexer().UnLex(Tokens.pop_back_val());
2467       }
2468     }
2469     return true;
2470   }
2471   return false;
2472 }
2473 
2474 Optional<StringRef>
2475 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2476   switch (RegKind) {
2477   case IS_VGPR:
2478     return StringRef(".amdgcn.next_free_vgpr");
2479   case IS_SGPR:
2480     return StringRef(".amdgcn.next_free_sgpr");
2481   default:
2482     return None;
2483   }
2484 }
2485 
2486 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2487   auto SymbolName = getGprCountSymbolName(RegKind);
2488   assert(SymbolName && "initializing invalid register kind");
2489   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2490   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2491 }
2492 
2493 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2494                                             unsigned DwordRegIndex,
2495                                             unsigned RegWidth) {
2496   // Symbols are only defined for GCN targets
2497   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2498     return true;
2499 
2500   auto SymbolName = getGprCountSymbolName(RegKind);
2501   if (!SymbolName)
2502     return true;
2503   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2504 
2505   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2506   int64_t OldCount;
2507 
2508   if (!Sym->isVariable())
2509     return !Error(getLoc(),
2510                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2511   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2512     return !Error(
2513         getLoc(),
2514         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2515 
2516   if (OldCount <= NewMax)
2517     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2518 
2519   return true;
2520 }
2521 
2522 std::unique_ptr<AMDGPUOperand>
2523 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2524   const auto &Tok = getToken();
2525   SMLoc StartLoc = Tok.getLoc();
2526   SMLoc EndLoc = Tok.getEndLoc();
2527   RegisterKind RegKind;
2528   unsigned Reg, RegNum, RegWidth;
2529 
2530   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2531     return nullptr;
2532   }
2533   if (isHsaAbiVersion3(&getSTI())) {
2534     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2535       return nullptr;
2536   } else
2537     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2538   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2539 }
2540 
2541 OperandMatchResultTy
2542 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2543   // TODO: add syntactic sugar for 1/(2*PI)
2544 
2545   assert(!isRegister());
2546   assert(!isModifier());
2547 
2548   const auto& Tok = getToken();
2549   const auto& NextTok = peekToken();
2550   bool IsReal = Tok.is(AsmToken::Real);
2551   SMLoc S = getLoc();
2552   bool Negate = false;
2553 
2554   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2555     lex();
2556     IsReal = true;
2557     Negate = true;
2558   }
2559 
2560   if (IsReal) {
2561     // Floating-point expressions are not supported.
2562     // Can only allow floating-point literals with an
2563     // optional sign.
2564 
2565     StringRef Num = getTokenStr();
2566     lex();
2567 
2568     APFloat RealVal(APFloat::IEEEdouble());
2569     auto roundMode = APFloat::rmNearestTiesToEven;
2570     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2571       return MatchOperand_ParseFail;
2572     }
2573     if (Negate)
2574       RealVal.changeSign();
2575 
2576     Operands.push_back(
2577       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2578                                AMDGPUOperand::ImmTyNone, true));
2579 
2580     return MatchOperand_Success;
2581 
2582   } else {
2583     int64_t IntVal;
2584     const MCExpr *Expr;
2585     SMLoc S = getLoc();
2586 
2587     if (HasSP3AbsModifier) {
2588       // This is a workaround for handling expressions
2589       // as arguments of SP3 'abs' modifier, for example:
2590       //     |1.0|
2591       //     |-1|
2592       //     |1+x|
2593       // This syntax is not compatible with syntax of standard
2594       // MC expressions (due to the trailing '|').
2595       SMLoc EndLoc;
2596       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2597         return MatchOperand_ParseFail;
2598     } else {
2599       if (Parser.parseExpression(Expr))
2600         return MatchOperand_ParseFail;
2601     }
2602 
2603     if (Expr->evaluateAsAbsolute(IntVal)) {
2604       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2605     } else {
2606       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2607     }
2608 
2609     return MatchOperand_Success;
2610   }
2611 
2612   return MatchOperand_NoMatch;
2613 }
2614 
2615 OperandMatchResultTy
2616 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2617   if (!isRegister())
2618     return MatchOperand_NoMatch;
2619 
2620   if (auto R = parseRegister()) {
2621     assert(R->isReg());
2622     Operands.push_back(std::move(R));
2623     return MatchOperand_Success;
2624   }
2625   return MatchOperand_ParseFail;
2626 }
2627 
2628 OperandMatchResultTy
2629 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2630   auto res = parseReg(Operands);
2631   if (res != MatchOperand_NoMatch) {
2632     return res;
2633   } else if (isModifier()) {
2634     return MatchOperand_NoMatch;
2635   } else {
2636     return parseImm(Operands, HasSP3AbsMod);
2637   }
2638 }
2639 
2640 bool
2641 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2642   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2643     const auto &str = Token.getString();
2644     return str == "abs" || str == "neg" || str == "sext";
2645   }
2646   return false;
2647 }
2648 
2649 bool
2650 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2651   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2652 }
2653 
2654 bool
2655 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2656   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2657 }
2658 
2659 bool
2660 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2661   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2662 }
2663 
2664 // Check if this is an operand modifier or an opcode modifier
2665 // which may look like an expression but it is not. We should
2666 // avoid parsing these modifiers as expressions. Currently
2667 // recognized sequences are:
2668 //   |...|
2669 //   abs(...)
2670 //   neg(...)
2671 //   sext(...)
2672 //   -reg
2673 //   -|...|
2674 //   -abs(...)
2675 //   name:...
2676 // Note that simple opcode modifiers like 'gds' may be parsed as
2677 // expressions; this is a special case. See getExpressionAsToken.
2678 //
2679 bool
2680 AMDGPUAsmParser::isModifier() {
2681 
2682   AsmToken Tok = getToken();
2683   AsmToken NextToken[2];
2684   peekTokens(NextToken);
2685 
2686   return isOperandModifier(Tok, NextToken[0]) ||
2687          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2688          isOpcodeModifierWithVal(Tok, NextToken[0]);
2689 }
2690 
2691 // Check if the current token is an SP3 'neg' modifier.
2692 // Currently this modifier is allowed in the following context:
2693 //
2694 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2695 // 2. Before an 'abs' modifier: -abs(...)
2696 // 3. Before an SP3 'abs' modifier: -|...|
2697 //
2698 // In all other cases "-" is handled as a part
2699 // of an expression that follows the sign.
2700 //
2701 // Note: When "-" is followed by an integer literal,
2702 // this is interpreted as integer negation rather
2703 // than a floating-point NEG modifier applied to N.
2704 // Beside being contr-intuitive, such use of floating-point
2705 // NEG modifier would have resulted in different meaning
2706 // of integer literals used with VOP1/2/C and VOP3,
2707 // for example:
2708 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2709 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2710 // Negative fp literals with preceding "-" are
2711 // handled likewise for unifomtity
2712 //
2713 bool
2714 AMDGPUAsmParser::parseSP3NegModifier() {
2715 
2716   AsmToken NextToken[2];
2717   peekTokens(NextToken);
2718 
2719   if (isToken(AsmToken::Minus) &&
2720       (isRegister(NextToken[0], NextToken[1]) ||
2721        NextToken[0].is(AsmToken::Pipe) ||
2722        isId(NextToken[0], "abs"))) {
2723     lex();
2724     return true;
2725   }
2726 
2727   return false;
2728 }
2729 
2730 OperandMatchResultTy
2731 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2732                                               bool AllowImm) {
2733   bool Neg, SP3Neg;
2734   bool Abs, SP3Abs;
2735   SMLoc Loc;
2736 
2737   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2738   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2739     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2740     return MatchOperand_ParseFail;
2741   }
2742 
2743   SP3Neg = parseSP3NegModifier();
2744 
2745   Loc = getLoc();
2746   Neg = trySkipId("neg");
2747   if (Neg && SP3Neg) {
2748     Error(Loc, "expected register or immediate");
2749     return MatchOperand_ParseFail;
2750   }
2751   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2752     return MatchOperand_ParseFail;
2753 
2754   Abs = trySkipId("abs");
2755   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2756     return MatchOperand_ParseFail;
2757 
2758   Loc = getLoc();
2759   SP3Abs = trySkipToken(AsmToken::Pipe);
2760   if (Abs && SP3Abs) {
2761     Error(Loc, "expected register or immediate");
2762     return MatchOperand_ParseFail;
2763   }
2764 
2765   OperandMatchResultTy Res;
2766   if (AllowImm) {
2767     Res = parseRegOrImm(Operands, SP3Abs);
2768   } else {
2769     Res = parseReg(Operands);
2770   }
2771   if (Res != MatchOperand_Success) {
2772     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2773   }
2774 
2775   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2776     return MatchOperand_ParseFail;
2777   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2778     return MatchOperand_ParseFail;
2779   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2780     return MatchOperand_ParseFail;
2781 
2782   AMDGPUOperand::Modifiers Mods;
2783   Mods.Abs = Abs || SP3Abs;
2784   Mods.Neg = Neg || SP3Neg;
2785 
2786   if (Mods.hasFPModifiers()) {
2787     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2788     if (Op.isExpr()) {
2789       Error(Op.getStartLoc(), "expected an absolute expression");
2790       return MatchOperand_ParseFail;
2791     }
2792     Op.setModifiers(Mods);
2793   }
2794   return MatchOperand_Success;
2795 }
2796 
2797 OperandMatchResultTy
2798 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2799                                                bool AllowImm) {
2800   bool Sext = trySkipId("sext");
2801   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2802     return MatchOperand_ParseFail;
2803 
2804   OperandMatchResultTy Res;
2805   if (AllowImm) {
2806     Res = parseRegOrImm(Operands);
2807   } else {
2808     Res = parseReg(Operands);
2809   }
2810   if (Res != MatchOperand_Success) {
2811     return Sext? MatchOperand_ParseFail : Res;
2812   }
2813 
2814   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2815     return MatchOperand_ParseFail;
2816 
2817   AMDGPUOperand::Modifiers Mods;
2818   Mods.Sext = Sext;
2819 
2820   if (Mods.hasIntModifiers()) {
2821     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2822     if (Op.isExpr()) {
2823       Error(Op.getStartLoc(), "expected an absolute expression");
2824       return MatchOperand_ParseFail;
2825     }
2826     Op.setModifiers(Mods);
2827   }
2828 
2829   return MatchOperand_Success;
2830 }
2831 
2832 OperandMatchResultTy
2833 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2834   return parseRegOrImmWithFPInputMods(Operands, false);
2835 }
2836 
2837 OperandMatchResultTy
2838 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2839   return parseRegOrImmWithIntInputMods(Operands, false);
2840 }
2841 
2842 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2843   auto Loc = getLoc();
2844   if (trySkipId("off")) {
2845     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2846                                                 AMDGPUOperand::ImmTyOff, false));
2847     return MatchOperand_Success;
2848   }
2849 
2850   if (!isRegister())
2851     return MatchOperand_NoMatch;
2852 
2853   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2854   if (Reg) {
2855     Operands.push_back(std::move(Reg));
2856     return MatchOperand_Success;
2857   }
2858 
2859   return MatchOperand_ParseFail;
2860 
2861 }
2862 
2863 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2864   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2865 
2866   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2867       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2868       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2869       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2870     return Match_InvalidOperand;
2871 
2872   if ((TSFlags & SIInstrFlags::VOP3) &&
2873       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2874       getForcedEncodingSize() != 64)
2875     return Match_PreferE32;
2876 
2877   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2878       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2879     // v_mac_f32/16 allow only dst_sel == DWORD;
2880     auto OpNum =
2881         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2882     const auto &Op = Inst.getOperand(OpNum);
2883     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2884       return Match_InvalidOperand;
2885     }
2886   }
2887 
2888   return Match_Success;
2889 }
2890 
2891 static ArrayRef<unsigned> getAllVariants() {
2892   static const unsigned Variants[] = {
2893     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2894     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2895   };
2896 
2897   return makeArrayRef(Variants);
2898 }
2899 
2900 // What asm variants we should check
2901 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2902   if (getForcedEncodingSize() == 32) {
2903     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2904     return makeArrayRef(Variants);
2905   }
2906 
2907   if (isForcedVOP3()) {
2908     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2909     return makeArrayRef(Variants);
2910   }
2911 
2912   if (isForcedSDWA()) {
2913     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2914                                         AMDGPUAsmVariants::SDWA9};
2915     return makeArrayRef(Variants);
2916   }
2917 
2918   if (isForcedDPP()) {
2919     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2920     return makeArrayRef(Variants);
2921   }
2922 
2923   return getAllVariants();
2924 }
2925 
2926 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2927   if (getForcedEncodingSize() == 32)
2928     return "e32";
2929 
2930   if (isForcedVOP3())
2931     return "e64";
2932 
2933   if (isForcedSDWA())
2934     return "sdwa";
2935 
2936   if (isForcedDPP())
2937     return "dpp";
2938 
2939   return "";
2940 }
2941 
2942 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2943   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2944   const unsigned Num = Desc.getNumImplicitUses();
2945   for (unsigned i = 0; i < Num; ++i) {
2946     unsigned Reg = Desc.ImplicitUses[i];
2947     switch (Reg) {
2948     case AMDGPU::FLAT_SCR:
2949     case AMDGPU::VCC:
2950     case AMDGPU::VCC_LO:
2951     case AMDGPU::VCC_HI:
2952     case AMDGPU::M0:
2953       return Reg;
2954     default:
2955       break;
2956     }
2957   }
2958   return AMDGPU::NoRegister;
2959 }
2960 
2961 // NB: This code is correct only when used to check constant
2962 // bus limitations because GFX7 support no f16 inline constants.
2963 // Note that there are no cases when a GFX7 opcode violates
2964 // constant bus limitations due to the use of an f16 constant.
2965 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2966                                        unsigned OpIdx) const {
2967   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2968 
2969   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2970     return false;
2971   }
2972 
2973   const MCOperand &MO = Inst.getOperand(OpIdx);
2974 
2975   int64_t Val = MO.getImm();
2976   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2977 
2978   switch (OpSize) { // expected operand size
2979   case 8:
2980     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2981   case 4:
2982     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2983   case 2: {
2984     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2985     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2986         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2987         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2988       return AMDGPU::isInlinableIntLiteral(Val);
2989 
2990     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2991         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2992         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2993       return AMDGPU::isInlinableIntLiteralV216(Val);
2994 
2995     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2996         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2997         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2998       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2999 
3000     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3001   }
3002   default:
3003     llvm_unreachable("invalid operand size");
3004   }
3005 }
3006 
3007 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3008   if (!isGFX10Plus())
3009     return 1;
3010 
3011   switch (Opcode) {
3012   // 64-bit shift instructions can use only one scalar value input
3013   case AMDGPU::V_LSHLREV_B64_e64:
3014   case AMDGPU::V_LSHLREV_B64_gfx10:
3015   case AMDGPU::V_LSHRREV_B64_e64:
3016   case AMDGPU::V_LSHRREV_B64_gfx10:
3017   case AMDGPU::V_ASHRREV_I64_e64:
3018   case AMDGPU::V_ASHRREV_I64_gfx10:
3019   case AMDGPU::V_LSHL_B64_e64:
3020   case AMDGPU::V_LSHR_B64_e64:
3021   case AMDGPU::V_ASHR_I64_e64:
3022     return 1;
3023   default:
3024     return 2;
3025   }
3026 }
3027 
3028 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3029   const MCOperand &MO = Inst.getOperand(OpIdx);
3030   if (MO.isImm()) {
3031     return !isInlineConstant(Inst, OpIdx);
3032   } else if (MO.isReg()) {
3033     auto Reg = MO.getReg();
3034     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3035     auto PReg = mc2PseudoReg(Reg);
3036     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3037   } else {
3038     return true;
3039   }
3040 }
3041 
3042 bool
3043 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3044                                                 const OperandVector &Operands) {
3045   const unsigned Opcode = Inst.getOpcode();
3046   const MCInstrDesc &Desc = MII.get(Opcode);
3047   unsigned LastSGPR = AMDGPU::NoRegister;
3048   unsigned ConstantBusUseCount = 0;
3049   unsigned NumLiterals = 0;
3050   unsigned LiteralSize;
3051 
3052   if (Desc.TSFlags &
3053       (SIInstrFlags::VOPC |
3054        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3055        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3056        SIInstrFlags::SDWA)) {
3057     // Check special imm operands (used by madmk, etc)
3058     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3059       ++ConstantBusUseCount;
3060     }
3061 
3062     SmallDenseSet<unsigned> SGPRsUsed;
3063     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3064     if (SGPRUsed != AMDGPU::NoRegister) {
3065       SGPRsUsed.insert(SGPRUsed);
3066       ++ConstantBusUseCount;
3067     }
3068 
3069     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3070     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3071     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3072 
3073     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3074 
3075     for (int OpIdx : OpIndices) {
3076       if (OpIdx == -1) break;
3077 
3078       const MCOperand &MO = Inst.getOperand(OpIdx);
3079       if (usesConstantBus(Inst, OpIdx)) {
3080         if (MO.isReg()) {
3081           LastSGPR = mc2PseudoReg(MO.getReg());
3082           // Pairs of registers with a partial intersections like these
3083           //   s0, s[0:1]
3084           //   flat_scratch_lo, flat_scratch
3085           //   flat_scratch_lo, flat_scratch_hi
3086           // are theoretically valid but they are disabled anyway.
3087           // Note that this code mimics SIInstrInfo::verifyInstruction
3088           if (!SGPRsUsed.count(LastSGPR)) {
3089             SGPRsUsed.insert(LastSGPR);
3090             ++ConstantBusUseCount;
3091           }
3092         } else { // Expression or a literal
3093 
3094           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3095             continue; // special operand like VINTERP attr_chan
3096 
3097           // An instruction may use only one literal.
3098           // This has been validated on the previous step.
3099           // See validateVOP3Literal.
3100           // This literal may be used as more than one operand.
3101           // If all these operands are of the same size,
3102           // this literal counts as one scalar value.
3103           // Otherwise it counts as 2 scalar values.
3104           // See "GFX10 Shader Programming", section 3.6.2.3.
3105 
3106           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3107           if (Size < 4) Size = 4;
3108 
3109           if (NumLiterals == 0) {
3110             NumLiterals = 1;
3111             LiteralSize = Size;
3112           } else if (LiteralSize != Size) {
3113             NumLiterals = 2;
3114           }
3115         }
3116       }
3117     }
3118   }
3119   ConstantBusUseCount += NumLiterals;
3120 
3121   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3122     return true;
3123 
3124   SMLoc LitLoc = getLitLoc(Operands);
3125   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3126   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3127   Error(Loc, "invalid operand (violates constant bus restrictions)");
3128   return false;
3129 }
3130 
3131 bool
3132 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3133                                                  const OperandVector &Operands) {
3134   const unsigned Opcode = Inst.getOpcode();
3135   const MCInstrDesc &Desc = MII.get(Opcode);
3136 
3137   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3138   if (DstIdx == -1 ||
3139       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3140     return true;
3141   }
3142 
3143   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3144 
3145   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3146   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3147   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3148 
3149   assert(DstIdx != -1);
3150   const MCOperand &Dst = Inst.getOperand(DstIdx);
3151   assert(Dst.isReg());
3152   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3153 
3154   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3155 
3156   for (int SrcIdx : SrcIndices) {
3157     if (SrcIdx == -1) break;
3158     const MCOperand &Src = Inst.getOperand(SrcIdx);
3159     if (Src.isReg()) {
3160       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3161       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3162         Error(getRegLoc(SrcReg, Operands),
3163           "destination must be different than all sources");
3164         return false;
3165       }
3166     }
3167   }
3168 
3169   return true;
3170 }
3171 
3172 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3173 
3174   const unsigned Opc = Inst.getOpcode();
3175   const MCInstrDesc &Desc = MII.get(Opc);
3176 
3177   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3178     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3179     assert(ClampIdx != -1);
3180     return Inst.getOperand(ClampIdx).getImm() == 0;
3181   }
3182 
3183   return true;
3184 }
3185 
3186 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3187 
3188   const unsigned Opc = Inst.getOpcode();
3189   const MCInstrDesc &Desc = MII.get(Opc);
3190 
3191   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3192     return true;
3193 
3194   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3195   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3196   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3197 
3198   assert(VDataIdx != -1);
3199 
3200   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3201     return true;
3202 
3203   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3204   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3205   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3206   if (DMask == 0)
3207     DMask = 1;
3208 
3209   unsigned DataSize =
3210     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3211   if (hasPackedD16()) {
3212     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3213     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3214       DataSize = (DataSize + 1) / 2;
3215   }
3216 
3217   return (VDataSize / 4) == DataSize + TFESize;
3218 }
3219 
3220 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3221   const unsigned Opc = Inst.getOpcode();
3222   const MCInstrDesc &Desc = MII.get(Opc);
3223 
3224   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3225     return true;
3226 
3227   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3228 
3229   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3230       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3231   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3232   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3233   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3234 
3235   assert(VAddr0Idx != -1);
3236   assert(SrsrcIdx != -1);
3237   assert(SrsrcIdx > VAddr0Idx);
3238 
3239   if (DimIdx == -1)
3240     return true; // intersect_ray
3241 
3242   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3243   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3244   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3245   unsigned VAddrSize =
3246       IsNSA ? SrsrcIdx - VAddr0Idx
3247             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3248 
3249   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3250                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3251                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3252                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3253   if (!IsNSA) {
3254     if (AddrSize > 8)
3255       AddrSize = 16;
3256     else if (AddrSize > 4)
3257       AddrSize = 8;
3258   }
3259 
3260   return VAddrSize == AddrSize;
3261 }
3262 
3263 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3264 
3265   const unsigned Opc = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opc);
3267 
3268   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3269     return true;
3270   if (!Desc.mayLoad() || !Desc.mayStore())
3271     return true; // Not atomic
3272 
3273   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3274   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3275 
3276   // This is an incomplete check because image_atomic_cmpswap
3277   // may only use 0x3 and 0xf while other atomic operations
3278   // may use 0x1 and 0x3. However these limitations are
3279   // verified when we check that dmask matches dst size.
3280   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3281 }
3282 
3283 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3284 
3285   const unsigned Opc = Inst.getOpcode();
3286   const MCInstrDesc &Desc = MII.get(Opc);
3287 
3288   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3289     return true;
3290 
3291   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3292   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3293 
3294   // GATHER4 instructions use dmask in a different fashion compared to
3295   // other MIMG instructions. The only useful DMASK values are
3296   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3297   // (red,red,red,red) etc.) The ISA document doesn't mention
3298   // this.
3299   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3300 }
3301 
3302 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3303 {
3304   switch (Opcode) {
3305   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3306   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3307   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3308     return true;
3309   default:
3310     return false;
3311   }
3312 }
3313 
3314 // movrels* opcodes should only allow VGPRS as src0.
3315 // This is specified in .td description for vop1/vop3,
3316 // but sdwa is handled differently. See isSDWAOperand.
3317 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3318                                       const OperandVector &Operands) {
3319 
3320   const unsigned Opc = Inst.getOpcode();
3321   const MCInstrDesc &Desc = MII.get(Opc);
3322 
3323   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3324     return true;
3325 
3326   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3327   assert(Src0Idx != -1);
3328 
3329   SMLoc ErrLoc;
3330   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3331   if (Src0.isReg()) {
3332     auto Reg = mc2PseudoReg(Src0.getReg());
3333     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3334     if (!isSGPR(Reg, TRI))
3335       return true;
3336     ErrLoc = getRegLoc(Reg, Operands);
3337   } else {
3338     ErrLoc = getConstLoc(Operands);
3339   }
3340 
3341   Error(ErrLoc, "source operand must be a VGPR");
3342   return false;
3343 }
3344 
3345 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3346                                           const OperandVector &Operands) {
3347 
3348   const unsigned Opc = Inst.getOpcode();
3349 
3350   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3351     return true;
3352 
3353   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3354   assert(Src0Idx != -1);
3355 
3356   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3357   if (!Src0.isReg())
3358     return true;
3359 
3360   auto Reg = mc2PseudoReg(Src0.getReg());
3361   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3362   if (isSGPR(Reg, TRI)) {
3363     Error(getRegLoc(Reg, Operands),
3364           "source operand must be either a VGPR or an inline constant");
3365     return false;
3366   }
3367 
3368   return true;
3369 }
3370 
3371 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3372   switch (Inst.getOpcode()) {
3373   default:
3374     return true;
3375   case V_DIV_SCALE_F32_gfx6_gfx7:
3376   case V_DIV_SCALE_F32_vi:
3377   case V_DIV_SCALE_F32_gfx10:
3378   case V_DIV_SCALE_F64_gfx6_gfx7:
3379   case V_DIV_SCALE_F64_vi:
3380   case V_DIV_SCALE_F64_gfx10:
3381     break;
3382   }
3383 
3384   // TODO: Check that src0 = src1 or src2.
3385 
3386   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3387                     AMDGPU::OpName::src2_modifiers,
3388                     AMDGPU::OpName::src2_modifiers}) {
3389     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3390             .getImm() &
3391         SISrcMods::ABS) {
3392       return false;
3393     }
3394   }
3395 
3396   return true;
3397 }
3398 
3399 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3400 
3401   const unsigned Opc = Inst.getOpcode();
3402   const MCInstrDesc &Desc = MII.get(Opc);
3403 
3404   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3405     return true;
3406 
3407   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3408   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3409     if (isCI() || isSI())
3410       return false;
3411   }
3412 
3413   return true;
3414 }
3415 
3416 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3417   const unsigned Opc = Inst.getOpcode();
3418   const MCInstrDesc &Desc = MII.get(Opc);
3419 
3420   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3421     return true;
3422 
3423   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3424   if (DimIdx < 0)
3425     return true;
3426 
3427   long Imm = Inst.getOperand(DimIdx).getImm();
3428   if (Imm < 0 || Imm >= 8)
3429     return false;
3430 
3431   return true;
3432 }
3433 
3434 static bool IsRevOpcode(const unsigned Opcode)
3435 {
3436   switch (Opcode) {
3437   case AMDGPU::V_SUBREV_F32_e32:
3438   case AMDGPU::V_SUBREV_F32_e64:
3439   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3440   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3441   case AMDGPU::V_SUBREV_F32_e32_vi:
3442   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3443   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3444   case AMDGPU::V_SUBREV_F32_e64_vi:
3445 
3446   case AMDGPU::V_SUBREV_CO_U32_e32:
3447   case AMDGPU::V_SUBREV_CO_U32_e64:
3448   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3449   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3450 
3451   case AMDGPU::V_SUBBREV_U32_e32:
3452   case AMDGPU::V_SUBBREV_U32_e64:
3453   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3454   case AMDGPU::V_SUBBREV_U32_e32_vi:
3455   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3456   case AMDGPU::V_SUBBREV_U32_e64_vi:
3457 
3458   case AMDGPU::V_SUBREV_U32_e32:
3459   case AMDGPU::V_SUBREV_U32_e64:
3460   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3461   case AMDGPU::V_SUBREV_U32_e32_vi:
3462   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3463   case AMDGPU::V_SUBREV_U32_e64_vi:
3464 
3465   case AMDGPU::V_SUBREV_F16_e32:
3466   case AMDGPU::V_SUBREV_F16_e64:
3467   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3468   case AMDGPU::V_SUBREV_F16_e32_vi:
3469   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3470   case AMDGPU::V_SUBREV_F16_e64_vi:
3471 
3472   case AMDGPU::V_SUBREV_U16_e32:
3473   case AMDGPU::V_SUBREV_U16_e64:
3474   case AMDGPU::V_SUBREV_U16_e32_vi:
3475   case AMDGPU::V_SUBREV_U16_e64_vi:
3476 
3477   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3478   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3479   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3480 
3481   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3482   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3483 
3484   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3485   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3486 
3487   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3488   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3489 
3490   case AMDGPU::V_LSHRREV_B32_e32:
3491   case AMDGPU::V_LSHRREV_B32_e64:
3492   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3493   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3494   case AMDGPU::V_LSHRREV_B32_e32_vi:
3495   case AMDGPU::V_LSHRREV_B32_e64_vi:
3496   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3497   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3498 
3499   case AMDGPU::V_ASHRREV_I32_e32:
3500   case AMDGPU::V_ASHRREV_I32_e64:
3501   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3502   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3503   case AMDGPU::V_ASHRREV_I32_e32_vi:
3504   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3505   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3506   case AMDGPU::V_ASHRREV_I32_e64_vi:
3507 
3508   case AMDGPU::V_LSHLREV_B32_e32:
3509   case AMDGPU::V_LSHLREV_B32_e64:
3510   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3511   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3512   case AMDGPU::V_LSHLREV_B32_e32_vi:
3513   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3514   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3515   case AMDGPU::V_LSHLREV_B32_e64_vi:
3516 
3517   case AMDGPU::V_LSHLREV_B16_e32:
3518   case AMDGPU::V_LSHLREV_B16_e64:
3519   case AMDGPU::V_LSHLREV_B16_e32_vi:
3520   case AMDGPU::V_LSHLREV_B16_e64_vi:
3521   case AMDGPU::V_LSHLREV_B16_gfx10:
3522 
3523   case AMDGPU::V_LSHRREV_B16_e32:
3524   case AMDGPU::V_LSHRREV_B16_e64:
3525   case AMDGPU::V_LSHRREV_B16_e32_vi:
3526   case AMDGPU::V_LSHRREV_B16_e64_vi:
3527   case AMDGPU::V_LSHRREV_B16_gfx10:
3528 
3529   case AMDGPU::V_ASHRREV_I16_e32:
3530   case AMDGPU::V_ASHRREV_I16_e64:
3531   case AMDGPU::V_ASHRREV_I16_e32_vi:
3532   case AMDGPU::V_ASHRREV_I16_e64_vi:
3533   case AMDGPU::V_ASHRREV_I16_gfx10:
3534 
3535   case AMDGPU::V_LSHLREV_B64_e64:
3536   case AMDGPU::V_LSHLREV_B64_gfx10:
3537   case AMDGPU::V_LSHLREV_B64_vi:
3538 
3539   case AMDGPU::V_LSHRREV_B64_e64:
3540   case AMDGPU::V_LSHRREV_B64_gfx10:
3541   case AMDGPU::V_LSHRREV_B64_vi:
3542 
3543   case AMDGPU::V_ASHRREV_I64_e64:
3544   case AMDGPU::V_ASHRREV_I64_gfx10:
3545   case AMDGPU::V_ASHRREV_I64_vi:
3546 
3547   case AMDGPU::V_PK_LSHLREV_B16:
3548   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3549   case AMDGPU::V_PK_LSHLREV_B16_vi:
3550 
3551   case AMDGPU::V_PK_LSHRREV_B16:
3552   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3553   case AMDGPU::V_PK_LSHRREV_B16_vi:
3554   case AMDGPU::V_PK_ASHRREV_I16:
3555   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3556   case AMDGPU::V_PK_ASHRREV_I16_vi:
3557     return true;
3558   default:
3559     return false;
3560   }
3561 }
3562 
3563 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3564 
3565   using namespace SIInstrFlags;
3566   const unsigned Opcode = Inst.getOpcode();
3567   const MCInstrDesc &Desc = MII.get(Opcode);
3568 
3569   // lds_direct register is defined so that it can be used
3570   // with 9-bit operands only. Ignore encodings which do not accept these.
3571   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3572     return true;
3573 
3574   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3575   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3576   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3577 
3578   const int SrcIndices[] = { Src1Idx, Src2Idx };
3579 
3580   // lds_direct cannot be specified as either src1 or src2.
3581   for (int SrcIdx : SrcIndices) {
3582     if (SrcIdx == -1) break;
3583     const MCOperand &Src = Inst.getOperand(SrcIdx);
3584     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3585       return false;
3586     }
3587   }
3588 
3589   if (Src0Idx == -1)
3590     return true;
3591 
3592   const MCOperand &Src = Inst.getOperand(Src0Idx);
3593   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3594     return true;
3595 
3596   // lds_direct is specified as src0. Check additional limitations.
3597   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3598 }
3599 
3600 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3601   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3602     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3603     if (Op.isFlatOffset())
3604       return Op.getStartLoc();
3605   }
3606   return getLoc();
3607 }
3608 
3609 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3610                                          const OperandVector &Operands) {
3611   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3612   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3613     return true;
3614 
3615   auto Opcode = Inst.getOpcode();
3616   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3617   assert(OpNum != -1);
3618 
3619   const auto &Op = Inst.getOperand(OpNum);
3620   if (!hasFlatOffsets() && Op.getImm() != 0) {
3621     Error(getFlatOffsetLoc(Operands),
3622           "flat offset modifier is not supported on this GPU");
3623     return false;
3624   }
3625 
3626   // For FLAT segment the offset must be positive;
3627   // MSB is ignored and forced to zero.
3628   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3629     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3630     if (!isIntN(OffsetSize, Op.getImm())) {
3631       Error(getFlatOffsetLoc(Operands),
3632             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3633       return false;
3634     }
3635   } else {
3636     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3637     if (!isUIntN(OffsetSize, Op.getImm())) {
3638       Error(getFlatOffsetLoc(Operands),
3639             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3640       return false;
3641     }
3642   }
3643 
3644   return true;
3645 }
3646 
3647 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3648   // Start with second operand because SMEM Offset cannot be dst or src0.
3649   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3651     if (Op.isSMEMOffset())
3652       return Op.getStartLoc();
3653   }
3654   return getLoc();
3655 }
3656 
3657 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3658                                          const OperandVector &Operands) {
3659   if (isCI() || isSI())
3660     return true;
3661 
3662   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3663   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3664     return true;
3665 
3666   auto Opcode = Inst.getOpcode();
3667   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3668   if (OpNum == -1)
3669     return true;
3670 
3671   const auto &Op = Inst.getOperand(OpNum);
3672   if (!Op.isImm())
3673     return true;
3674 
3675   uint64_t Offset = Op.getImm();
3676   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3677   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3678       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3679     return true;
3680 
3681   Error(getSMEMOffsetLoc(Operands),
3682         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3683                                "expected a 21-bit signed offset");
3684 
3685   return false;
3686 }
3687 
3688 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3689   unsigned Opcode = Inst.getOpcode();
3690   const MCInstrDesc &Desc = MII.get(Opcode);
3691   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3692     return true;
3693 
3694   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3695   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3696 
3697   const int OpIndices[] = { Src0Idx, Src1Idx };
3698 
3699   unsigned NumExprs = 0;
3700   unsigned NumLiterals = 0;
3701   uint32_t LiteralValue;
3702 
3703   for (int OpIdx : OpIndices) {
3704     if (OpIdx == -1) break;
3705 
3706     const MCOperand &MO = Inst.getOperand(OpIdx);
3707     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3708     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3709       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3710         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3711         if (NumLiterals == 0 || LiteralValue != Value) {
3712           LiteralValue = Value;
3713           ++NumLiterals;
3714         }
3715       } else if (MO.isExpr()) {
3716         ++NumExprs;
3717       }
3718     }
3719   }
3720 
3721   return NumLiterals + NumExprs <= 1;
3722 }
3723 
3724 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3725   const unsigned Opc = Inst.getOpcode();
3726   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3727       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3728     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3729     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3730 
3731     if (OpSel & ~3)
3732       return false;
3733   }
3734   return true;
3735 }
3736 
3737 // Check if VCC register matches wavefront size
3738 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3739   auto FB = getFeatureBits();
3740   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3741     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3742 }
3743 
3744 // VOP3 literal is only allowed in GFX10+ and only one can be used
3745 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3746                                           const OperandVector &Operands) {
3747   unsigned Opcode = Inst.getOpcode();
3748   const MCInstrDesc &Desc = MII.get(Opcode);
3749   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3750     return true;
3751 
3752   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3753   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3754   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3755 
3756   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3757 
3758   unsigned NumExprs = 0;
3759   unsigned NumLiterals = 0;
3760   uint32_t LiteralValue;
3761 
3762   for (int OpIdx : OpIndices) {
3763     if (OpIdx == -1) break;
3764 
3765     const MCOperand &MO = Inst.getOperand(OpIdx);
3766     if (!MO.isImm() && !MO.isExpr())
3767       continue;
3768     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3769       continue;
3770 
3771     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3772         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3773       Error(getConstLoc(Operands),
3774             "inline constants are not allowed for this operand");
3775       return false;
3776     }
3777 
3778     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3779       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3780       if (NumLiterals == 0 || LiteralValue != Value) {
3781         LiteralValue = Value;
3782         ++NumLiterals;
3783       }
3784     } else if (MO.isExpr()) {
3785       ++NumExprs;
3786     }
3787   }
3788   NumLiterals += NumExprs;
3789 
3790   if (!NumLiterals)
3791     return true;
3792 
3793   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3794     Error(getLitLoc(Operands), "literal operands are not supported");
3795     return false;
3796   }
3797 
3798   if (NumLiterals > 1) {
3799     Error(getLitLoc(Operands), "only one literal operand is allowed");
3800     return false;
3801   }
3802 
3803   return true;
3804 }
3805 
3806 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3807                                             const OperandVector &Operands,
3808                                             const SMLoc &IDLoc) {
3809   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3810                                           AMDGPU::OpName::glc1);
3811   if (GLCPos != -1) {
3812     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3813     // in the asm string, and the default value means it is not present.
3814     if (Inst.getOperand(GLCPos).getImm() == -1) {
3815       Error(IDLoc, "instruction must use glc");
3816       return false;
3817     }
3818   }
3819 
3820   return true;
3821 }
3822 
3823 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3824                                           const SMLoc &IDLoc,
3825                                           const OperandVector &Operands) {
3826   if (!validateLdsDirect(Inst)) {
3827     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3828       "invalid use of lds_direct");
3829     return false;
3830   }
3831   if (!validateSOPLiteral(Inst)) {
3832     Error(getLitLoc(Operands),
3833       "only one literal operand is allowed");
3834     return false;
3835   }
3836   if (!validateVOP3Literal(Inst, Operands)) {
3837     return false;
3838   }
3839   if (!validateConstantBusLimitations(Inst, Operands)) {
3840     return false;
3841   }
3842   if (!validateEarlyClobberLimitations(Inst, Operands)) {
3843     return false;
3844   }
3845   if (!validateIntClampSupported(Inst)) {
3846     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3847       "integer clamping is not supported on this GPU");
3848     return false;
3849   }
3850   if (!validateOpSel(Inst)) {
3851     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3852       "invalid op_sel operand");
3853     return false;
3854   }
3855   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3856   if (!validateMIMGD16(Inst)) {
3857     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3858       "d16 modifier is not supported on this GPU");
3859     return false;
3860   }
3861   if (!validateMIMGDim(Inst)) {
3862     Error(IDLoc, "dim modifier is required on this GPU");
3863     return false;
3864   }
3865   if (!validateMIMGDataSize(Inst)) {
3866     Error(IDLoc,
3867       "image data size does not match dmask and tfe");
3868     return false;
3869   }
3870   if (!validateMIMGAddrSize(Inst)) {
3871     Error(IDLoc,
3872       "image address size does not match dim and a16");
3873     return false;
3874   }
3875   if (!validateMIMGAtomicDMask(Inst)) {
3876     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3877       "invalid atomic image dmask");
3878     return false;
3879   }
3880   if (!validateMIMGGatherDMask(Inst)) {
3881     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3882       "invalid image_gather dmask: only one bit must be set");
3883     return false;
3884   }
3885   if (!validateMovrels(Inst, Operands)) {
3886     return false;
3887   }
3888   if (!validateFlatOffset(Inst, Operands)) {
3889     return false;
3890   }
3891   if (!validateSMEMOffset(Inst, Operands)) {
3892     return false;
3893   }
3894   if (!validateMAIAccWrite(Inst, Operands)) {
3895     return false;
3896   }
3897   if (!validateDivScale(Inst)) {
3898     Error(IDLoc, "ABS not allowed in VOP3B instructions");
3899     return false;
3900   }
3901   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3902     return false;
3903   }
3904 
3905   return true;
3906 }
3907 
3908 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3909                                             const FeatureBitset &FBS,
3910                                             unsigned VariantID = 0);
3911 
3912 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3913                                 const FeatureBitset &AvailableFeatures,
3914                                 unsigned VariantID);
3915 
3916 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3917                                        const FeatureBitset &FBS) {
3918   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3919 }
3920 
3921 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3922                                        const FeatureBitset &FBS,
3923                                        ArrayRef<unsigned> Variants) {
3924   for (auto Variant : Variants) {
3925     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3926       return true;
3927   }
3928 
3929   return false;
3930 }
3931 
3932 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3933                                                   const SMLoc &IDLoc) {
3934   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3935 
3936   // Check if requested instruction variant is supported.
3937   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3938     return false;
3939 
3940   // This instruction is not supported.
3941   // Clear any other pending errors because they are no longer relevant.
3942   getParser().clearPendingErrors();
3943 
3944   // Requested instruction variant is not supported.
3945   // Check if any other variants are supported.
3946   StringRef VariantName = getMatchedVariantName();
3947   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3948     return Error(IDLoc,
3949                  Twine(VariantName,
3950                        " variant of this instruction is not supported"));
3951   }
3952 
3953   // Finally check if this instruction is supported on any other GPU.
3954   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3955     return Error(IDLoc, "instruction not supported on this GPU");
3956   }
3957 
3958   // Instruction not supported on any GPU. Probably a typo.
3959   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3960   return Error(IDLoc, "invalid instruction" + Suggestion);
3961 }
3962 
3963 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3964                                               OperandVector &Operands,
3965                                               MCStreamer &Out,
3966                                               uint64_t &ErrorInfo,
3967                                               bool MatchingInlineAsm) {
3968   MCInst Inst;
3969   unsigned Result = Match_Success;
3970   for (auto Variant : getMatchedVariants()) {
3971     uint64_t EI;
3972     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3973                                   Variant);
3974     // We order match statuses from least to most specific. We use most specific
3975     // status as resulting
3976     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3977     if ((R == Match_Success) ||
3978         (R == Match_PreferE32) ||
3979         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3980         (R == Match_InvalidOperand && Result != Match_MissingFeature
3981                                    && Result != Match_PreferE32) ||
3982         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3983                                    && Result != Match_MissingFeature
3984                                    && Result != Match_PreferE32)) {
3985       Result = R;
3986       ErrorInfo = EI;
3987     }
3988     if (R == Match_Success)
3989       break;
3990   }
3991 
3992   if (Result == Match_Success) {
3993     if (!validateInstruction(Inst, IDLoc, Operands)) {
3994       return true;
3995     }
3996     Inst.setLoc(IDLoc);
3997     Out.emitInstruction(Inst, getSTI());
3998     return false;
3999   }
4000 
4001   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4002   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4003     return true;
4004   }
4005 
4006   switch (Result) {
4007   default: break;
4008   case Match_MissingFeature:
4009     // It has been verified that the specified instruction
4010     // mnemonic is valid. A match was found but it requires
4011     // features which are not supported on this GPU.
4012     return Error(IDLoc, "operands are not valid for this GPU or mode");
4013 
4014   case Match_InvalidOperand: {
4015     SMLoc ErrorLoc = IDLoc;
4016     if (ErrorInfo != ~0ULL) {
4017       if (ErrorInfo >= Operands.size()) {
4018         return Error(IDLoc, "too few operands for instruction");
4019       }
4020       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4021       if (ErrorLoc == SMLoc())
4022         ErrorLoc = IDLoc;
4023     }
4024     return Error(ErrorLoc, "invalid operand for instruction");
4025   }
4026 
4027   case Match_PreferE32:
4028     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4029                         "should be encoded as e32");
4030   case Match_MnemonicFail:
4031     llvm_unreachable("Invalid instructions should have been handled already");
4032   }
4033   llvm_unreachable("Implement any new match types added!");
4034 }
4035 
4036 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4037   int64_t Tmp = -1;
4038   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4039     return true;
4040   }
4041   if (getParser().parseAbsoluteExpression(Tmp)) {
4042     return true;
4043   }
4044   Ret = static_cast<uint32_t>(Tmp);
4045   return false;
4046 }
4047 
4048 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4049                                                uint32_t &Minor) {
4050   if (ParseAsAbsoluteExpression(Major))
4051     return TokError("invalid major version");
4052 
4053   if (!trySkipToken(AsmToken::Comma))
4054     return TokError("minor version number required, comma expected");
4055 
4056   if (ParseAsAbsoluteExpression(Minor))
4057     return TokError("invalid minor version");
4058 
4059   return false;
4060 }
4061 
4062 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4063   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4064     return TokError("directive only supported for amdgcn architecture");
4065 
4066   std::string Target;
4067 
4068   SMLoc TargetStart = getLoc();
4069   if (getParser().parseEscapedString(Target))
4070     return true;
4071   SMRange TargetRange = SMRange(TargetStart, getLoc());
4072 
4073   std::string ExpectedTarget;
4074   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4075   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4076 
4077   if (Target != ExpectedTargetOS.str())
4078     return Error(TargetRange.Start, "target must match options", TargetRange);
4079 
4080   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4081   return false;
4082 }
4083 
4084 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4085   return Error(Range.Start, "value out of range", Range);
4086 }
4087 
4088 bool AMDGPUAsmParser::calculateGPRBlocks(
4089     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4090     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4091     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4092     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4093   // TODO(scott.linder): These calculations are duplicated from
4094   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4095   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4096 
4097   unsigned NumVGPRs = NextFreeVGPR;
4098   unsigned NumSGPRs = NextFreeSGPR;
4099 
4100   if (Version.Major >= 10)
4101     NumSGPRs = 0;
4102   else {
4103     unsigned MaxAddressableNumSGPRs =
4104         IsaInfo::getAddressableNumSGPRs(&getSTI());
4105 
4106     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4107         NumSGPRs > MaxAddressableNumSGPRs)
4108       return OutOfRangeError(SGPRRange);
4109 
4110     NumSGPRs +=
4111         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4112 
4113     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4114         NumSGPRs > MaxAddressableNumSGPRs)
4115       return OutOfRangeError(SGPRRange);
4116 
4117     if (Features.test(FeatureSGPRInitBug))
4118       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4119   }
4120 
4121   VGPRBlocks =
4122       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4123   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4124 
4125   return false;
4126 }
4127 
4128 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4129   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4130     return TokError("directive only supported for amdgcn architecture");
4131 
4132   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4133     return TokError("directive only supported for amdhsa OS");
4134 
4135   StringRef KernelName;
4136   if (getParser().parseIdentifier(KernelName))
4137     return true;
4138 
4139   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4140 
4141   StringSet<> Seen;
4142 
4143   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4144 
4145   SMRange VGPRRange;
4146   uint64_t NextFreeVGPR = 0;
4147   SMRange SGPRRange;
4148   uint64_t NextFreeSGPR = 0;
4149   unsigned UserSGPRCount = 0;
4150   bool ReserveVCC = true;
4151   bool ReserveFlatScr = true;
4152   bool ReserveXNACK = hasXNACK();
4153   Optional<bool> EnableWavefrontSize32;
4154 
4155   while (true) {
4156     while (trySkipToken(AsmToken::EndOfStatement));
4157 
4158     StringRef ID;
4159     SMRange IDRange = getTok().getLocRange();
4160     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4161       return true;
4162 
4163     if (ID == ".end_amdhsa_kernel")
4164       break;
4165 
4166     if (Seen.find(ID) != Seen.end())
4167       return TokError(".amdhsa_ directives cannot be repeated");
4168     Seen.insert(ID);
4169 
4170     SMLoc ValStart = getLoc();
4171     int64_t IVal;
4172     if (getParser().parseAbsoluteExpression(IVal))
4173       return true;
4174     SMLoc ValEnd = getLoc();
4175     SMRange ValRange = SMRange(ValStart, ValEnd);
4176 
4177     if (IVal < 0)
4178       return OutOfRangeError(ValRange);
4179 
4180     uint64_t Val = IVal;
4181 
4182 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4183   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4184     return OutOfRangeError(RANGE);                                             \
4185   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4186 
4187     if (ID == ".amdhsa_group_segment_fixed_size") {
4188       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4189         return OutOfRangeError(ValRange);
4190       KD.group_segment_fixed_size = Val;
4191     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4192       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4193         return OutOfRangeError(ValRange);
4194       KD.private_segment_fixed_size = Val;
4195     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4196       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4197                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4198                        Val, ValRange);
4199       if (Val)
4200         UserSGPRCount += 4;
4201     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4202       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4203                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4204                        ValRange);
4205       if (Val)
4206         UserSGPRCount += 2;
4207     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4208       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4209                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4210                        ValRange);
4211       if (Val)
4212         UserSGPRCount += 2;
4213     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4214       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4215                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4216                        Val, ValRange);
4217       if (Val)
4218         UserSGPRCount += 2;
4219     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4220       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4221                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4222                        ValRange);
4223       if (Val)
4224         UserSGPRCount += 2;
4225     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4226       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4227                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4228                        ValRange);
4229       if (Val)
4230         UserSGPRCount += 2;
4231     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4232       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4233                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4234                        Val, ValRange);
4235       if (Val)
4236         UserSGPRCount += 1;
4237     } else if (ID == ".amdhsa_wavefront_size32") {
4238       if (IVersion.Major < 10)
4239         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4240       EnableWavefrontSize32 = Val;
4241       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4242                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4243                        Val, ValRange);
4244     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4245       PARSE_BITS_ENTRY(
4246           KD.compute_pgm_rsrc2,
4247           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4248           ValRange);
4249     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4250       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4251                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4252                        ValRange);
4253     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4254       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4255                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4256                        ValRange);
4257     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4258       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4259                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4260                        ValRange);
4261     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4262       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4263                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4264                        ValRange);
4265     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4266       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4267                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4268                        ValRange);
4269     } else if (ID == ".amdhsa_next_free_vgpr") {
4270       VGPRRange = ValRange;
4271       NextFreeVGPR = Val;
4272     } else if (ID == ".amdhsa_next_free_sgpr") {
4273       SGPRRange = ValRange;
4274       NextFreeSGPR = Val;
4275     } else if (ID == ".amdhsa_reserve_vcc") {
4276       if (!isUInt<1>(Val))
4277         return OutOfRangeError(ValRange);
4278       ReserveVCC = Val;
4279     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4280       if (IVersion.Major < 7)
4281         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4282       if (!isUInt<1>(Val))
4283         return OutOfRangeError(ValRange);
4284       ReserveFlatScr = Val;
4285     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4286       if (IVersion.Major < 8)
4287         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4288       if (!isUInt<1>(Val))
4289         return OutOfRangeError(ValRange);
4290       ReserveXNACK = Val;
4291     } else if (ID == ".amdhsa_float_round_mode_32") {
4292       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4293                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4294     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4295       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4296                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4297     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4298       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4299                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4300     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4301       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4302                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4303                        ValRange);
4304     } else if (ID == ".amdhsa_dx10_clamp") {
4305       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4306                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4307     } else if (ID == ".amdhsa_ieee_mode") {
4308       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4309                        Val, ValRange);
4310     } else if (ID == ".amdhsa_fp16_overflow") {
4311       if (IVersion.Major < 9)
4312         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4313       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4314                        ValRange);
4315     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4316       if (IVersion.Major < 10)
4317         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4318       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4319                        ValRange);
4320     } else if (ID == ".amdhsa_memory_ordered") {
4321       if (IVersion.Major < 10)
4322         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4323       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4324                        ValRange);
4325     } else if (ID == ".amdhsa_forward_progress") {
4326       if (IVersion.Major < 10)
4327         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4328       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4329                        ValRange);
4330     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4331       PARSE_BITS_ENTRY(
4332           KD.compute_pgm_rsrc2,
4333           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4334           ValRange);
4335     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4336       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4337                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4338                        Val, ValRange);
4339     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4340       PARSE_BITS_ENTRY(
4341           KD.compute_pgm_rsrc2,
4342           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4343           ValRange);
4344     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4345       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4346                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4347                        Val, ValRange);
4348     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4349       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4350                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4351                        Val, ValRange);
4352     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4353       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4354                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4355                        Val, ValRange);
4356     } else if (ID == ".amdhsa_exception_int_div_zero") {
4357       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4358                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4359                        Val, ValRange);
4360     } else {
4361       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4362     }
4363 
4364 #undef PARSE_BITS_ENTRY
4365   }
4366 
4367   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4368     return TokError(".amdhsa_next_free_vgpr directive is required");
4369 
4370   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4371     return TokError(".amdhsa_next_free_sgpr directive is required");
4372 
4373   unsigned VGPRBlocks;
4374   unsigned SGPRBlocks;
4375   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4376                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4377                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4378                          SGPRBlocks))
4379     return true;
4380 
4381   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4382           VGPRBlocks))
4383     return OutOfRangeError(VGPRRange);
4384   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4385                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4386 
4387   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4388           SGPRBlocks))
4389     return OutOfRangeError(SGPRRange);
4390   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4391                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4392                   SGPRBlocks);
4393 
4394   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4395     return TokError("too many user SGPRs enabled");
4396   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4397                   UserSGPRCount);
4398 
4399   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4400       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4401       ReserveFlatScr, ReserveXNACK);
4402   return false;
4403 }
4404 
4405 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4406   uint32_t Major;
4407   uint32_t Minor;
4408 
4409   if (ParseDirectiveMajorMinor(Major, Minor))
4410     return true;
4411 
4412   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4413   return false;
4414 }
4415 
4416 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4417   uint32_t Major;
4418   uint32_t Minor;
4419   uint32_t Stepping;
4420   StringRef VendorName;
4421   StringRef ArchName;
4422 
4423   // If this directive has no arguments, then use the ISA version for the
4424   // targeted GPU.
4425   if (isToken(AsmToken::EndOfStatement)) {
4426     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4427     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4428                                                       ISA.Stepping,
4429                                                       "AMD", "AMDGPU");
4430     return false;
4431   }
4432 
4433   if (ParseDirectiveMajorMinor(Major, Minor))
4434     return true;
4435 
4436   if (!trySkipToken(AsmToken::Comma))
4437     return TokError("stepping version number required, comma expected");
4438 
4439   if (ParseAsAbsoluteExpression(Stepping))
4440     return TokError("invalid stepping version");
4441 
4442   if (!trySkipToken(AsmToken::Comma))
4443     return TokError("vendor name required, comma expected");
4444 
4445   if (!parseString(VendorName, "invalid vendor name"))
4446     return true;
4447 
4448   if (!trySkipToken(AsmToken::Comma))
4449     return TokError("arch name required, comma expected");
4450 
4451   if (!parseString(ArchName, "invalid arch name"))
4452     return true;
4453 
4454   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4455                                                     VendorName, ArchName);
4456   return false;
4457 }
4458 
4459 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4460                                                amd_kernel_code_t &Header) {
4461   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4462   // assembly for backwards compatibility.
4463   if (ID == "max_scratch_backing_memory_byte_size") {
4464     Parser.eatToEndOfStatement();
4465     return false;
4466   }
4467 
4468   SmallString<40> ErrStr;
4469   raw_svector_ostream Err(ErrStr);
4470   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4471     return TokError(Err.str());
4472   }
4473   Lex();
4474 
4475   if (ID == "enable_wavefront_size32") {
4476     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4477       if (!isGFX10Plus())
4478         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4479       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4480         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4481     } else {
4482       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4483         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4484     }
4485   }
4486 
4487   if (ID == "wavefront_size") {
4488     if (Header.wavefront_size == 5) {
4489       if (!isGFX10Plus())
4490         return TokError("wavefront_size=5 is only allowed on GFX10+");
4491       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4492         return TokError("wavefront_size=5 requires +WavefrontSize32");
4493     } else if (Header.wavefront_size == 6) {
4494       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4495         return TokError("wavefront_size=6 requires +WavefrontSize64");
4496     }
4497   }
4498 
4499   if (ID == "enable_wgp_mode") {
4500     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4501         !isGFX10Plus())
4502       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4503   }
4504 
4505   if (ID == "enable_mem_ordered") {
4506     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4507         !isGFX10Plus())
4508       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4509   }
4510 
4511   if (ID == "enable_fwd_progress") {
4512     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4513         !isGFX10Plus())
4514       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4515   }
4516 
4517   return false;
4518 }
4519 
4520 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4521   amd_kernel_code_t Header;
4522   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4523 
4524   while (true) {
4525     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4526     // will set the current token to EndOfStatement.
4527     while(trySkipToken(AsmToken::EndOfStatement));
4528 
4529     StringRef ID;
4530     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4531       return true;
4532 
4533     if (ID == ".end_amd_kernel_code_t")
4534       break;
4535 
4536     if (ParseAMDKernelCodeTValue(ID, Header))
4537       return true;
4538   }
4539 
4540   getTargetStreamer().EmitAMDKernelCodeT(Header);
4541 
4542   return false;
4543 }
4544 
4545 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4546   StringRef KernelName;
4547   if (!parseId(KernelName, "expected symbol name"))
4548     return true;
4549 
4550   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4551                                            ELF::STT_AMDGPU_HSA_KERNEL);
4552 
4553   KernelScope.initialize(getContext());
4554   return false;
4555 }
4556 
4557 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4558   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4559     return Error(getLoc(),
4560                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4561                  "architectures");
4562   }
4563 
4564   auto ISAVersionStringFromASM = getToken().getStringContents();
4565 
4566   std::string ISAVersionStringFromSTI;
4567   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4568   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4569 
4570   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4571     return Error(getLoc(),
4572                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4573                  "arguments specified through the command line");
4574   }
4575 
4576   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4577   Lex();
4578 
4579   return false;
4580 }
4581 
4582 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4583   const char *AssemblerDirectiveBegin;
4584   const char *AssemblerDirectiveEnd;
4585   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4586       isHsaAbiVersion3(&getSTI())
4587           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4588                             HSAMD::V3::AssemblerDirectiveEnd)
4589           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4590                             HSAMD::AssemblerDirectiveEnd);
4591 
4592   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4593     return Error(getLoc(),
4594                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4595                  "not available on non-amdhsa OSes")).str());
4596   }
4597 
4598   std::string HSAMetadataString;
4599   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4600                           HSAMetadataString))
4601     return true;
4602 
4603   if (isHsaAbiVersion3(&getSTI())) {
4604     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4605       return Error(getLoc(), "invalid HSA metadata");
4606   } else {
4607     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4608       return Error(getLoc(), "invalid HSA metadata");
4609   }
4610 
4611   return false;
4612 }
4613 
4614 /// Common code to parse out a block of text (typically YAML) between start and
4615 /// end directives.
4616 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4617                                           const char *AssemblerDirectiveEnd,
4618                                           std::string &CollectString) {
4619 
4620   raw_string_ostream CollectStream(CollectString);
4621 
4622   getLexer().setSkipSpace(false);
4623 
4624   bool FoundEnd = false;
4625   while (!isToken(AsmToken::Eof)) {
4626     while (isToken(AsmToken::Space)) {
4627       CollectStream << getTokenStr();
4628       Lex();
4629     }
4630 
4631     if (trySkipId(AssemblerDirectiveEnd)) {
4632       FoundEnd = true;
4633       break;
4634     }
4635 
4636     CollectStream << Parser.parseStringToEndOfStatement()
4637                   << getContext().getAsmInfo()->getSeparatorString();
4638 
4639     Parser.eatToEndOfStatement();
4640   }
4641 
4642   getLexer().setSkipSpace(true);
4643 
4644   if (isToken(AsmToken::Eof) && !FoundEnd) {
4645     return TokError(Twine("expected directive ") +
4646                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4647   }
4648 
4649   CollectStream.flush();
4650   return false;
4651 }
4652 
4653 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4654 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4655   std::string String;
4656   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4657                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4658     return true;
4659 
4660   auto PALMetadata = getTargetStreamer().getPALMetadata();
4661   if (!PALMetadata->setFromString(String))
4662     return Error(getLoc(), "invalid PAL metadata");
4663   return false;
4664 }
4665 
4666 /// Parse the assembler directive for old linear-format PAL metadata.
4667 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4668   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4669     return Error(getLoc(),
4670                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4671                  "not available on non-amdpal OSes")).str());
4672   }
4673 
4674   auto PALMetadata = getTargetStreamer().getPALMetadata();
4675   PALMetadata->setLegacy();
4676   for (;;) {
4677     uint32_t Key, Value;
4678     if (ParseAsAbsoluteExpression(Key)) {
4679       return TokError(Twine("invalid value in ") +
4680                       Twine(PALMD::AssemblerDirective));
4681     }
4682     if (!trySkipToken(AsmToken::Comma)) {
4683       return TokError(Twine("expected an even number of values in ") +
4684                       Twine(PALMD::AssemblerDirective));
4685     }
4686     if (ParseAsAbsoluteExpression(Value)) {
4687       return TokError(Twine("invalid value in ") +
4688                       Twine(PALMD::AssemblerDirective));
4689     }
4690     PALMetadata->setRegister(Key, Value);
4691     if (!trySkipToken(AsmToken::Comma))
4692       break;
4693   }
4694   return false;
4695 }
4696 
4697 /// ParseDirectiveAMDGPULDS
4698 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4699 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4700   if (getParser().checkForValidSection())
4701     return true;
4702 
4703   StringRef Name;
4704   SMLoc NameLoc = getLoc();
4705   if (getParser().parseIdentifier(Name))
4706     return TokError("expected identifier in directive");
4707 
4708   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4709   if (parseToken(AsmToken::Comma, "expected ','"))
4710     return true;
4711 
4712   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4713 
4714   int64_t Size;
4715   SMLoc SizeLoc = getLoc();
4716   if (getParser().parseAbsoluteExpression(Size))
4717     return true;
4718   if (Size < 0)
4719     return Error(SizeLoc, "size must be non-negative");
4720   if (Size > LocalMemorySize)
4721     return Error(SizeLoc, "size is too large");
4722 
4723   int64_t Alignment = 4;
4724   if (trySkipToken(AsmToken::Comma)) {
4725     SMLoc AlignLoc = getLoc();
4726     if (getParser().parseAbsoluteExpression(Alignment))
4727       return true;
4728     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4729       return Error(AlignLoc, "alignment must be a power of two");
4730 
4731     // Alignment larger than the size of LDS is possible in theory, as long
4732     // as the linker manages to place to symbol at address 0, but we do want
4733     // to make sure the alignment fits nicely into a 32-bit integer.
4734     if (Alignment >= 1u << 31)
4735       return Error(AlignLoc, "alignment is too large");
4736   }
4737 
4738   if (parseToken(AsmToken::EndOfStatement,
4739                  "unexpected token in '.amdgpu_lds' directive"))
4740     return true;
4741 
4742   Symbol->redefineIfPossible();
4743   if (!Symbol->isUndefined())
4744     return Error(NameLoc, "invalid symbol redefinition");
4745 
4746   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4747   return false;
4748 }
4749 
4750 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4751   StringRef IDVal = DirectiveID.getString();
4752 
4753   if (isHsaAbiVersion3(&getSTI())) {
4754     if (IDVal == ".amdgcn_target")
4755       return ParseDirectiveAMDGCNTarget();
4756 
4757     if (IDVal == ".amdhsa_kernel")
4758       return ParseDirectiveAMDHSAKernel();
4759 
4760     // TODO: Restructure/combine with PAL metadata directive.
4761     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4762       return ParseDirectiveHSAMetadata();
4763   } else {
4764     if (IDVal == ".hsa_code_object_version")
4765       return ParseDirectiveHSACodeObjectVersion();
4766 
4767     if (IDVal == ".hsa_code_object_isa")
4768       return ParseDirectiveHSACodeObjectISA();
4769 
4770     if (IDVal == ".amd_kernel_code_t")
4771       return ParseDirectiveAMDKernelCodeT();
4772 
4773     if (IDVal == ".amdgpu_hsa_kernel")
4774       return ParseDirectiveAMDGPUHsaKernel();
4775 
4776     if (IDVal == ".amd_amdgpu_isa")
4777       return ParseDirectiveISAVersion();
4778 
4779     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4780       return ParseDirectiveHSAMetadata();
4781   }
4782 
4783   if (IDVal == ".amdgpu_lds")
4784     return ParseDirectiveAMDGPULDS();
4785 
4786   if (IDVal == PALMD::AssemblerDirectiveBegin)
4787     return ParseDirectivePALMetadataBegin();
4788 
4789   if (IDVal == PALMD::AssemblerDirective)
4790     return ParseDirectivePALMetadata();
4791 
4792   return true;
4793 }
4794 
4795 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4796                                            unsigned RegNo) const {
4797 
4798   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4799        R.isValid(); ++R) {
4800     if (*R == RegNo)
4801       return isGFX9Plus();
4802   }
4803 
4804   // GFX10 has 2 more SGPRs 104 and 105.
4805   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4806        R.isValid(); ++R) {
4807     if (*R == RegNo)
4808       return hasSGPR104_SGPR105();
4809   }
4810 
4811   switch (RegNo) {
4812   case AMDGPU::SRC_SHARED_BASE:
4813   case AMDGPU::SRC_SHARED_LIMIT:
4814   case AMDGPU::SRC_PRIVATE_BASE:
4815   case AMDGPU::SRC_PRIVATE_LIMIT:
4816   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4817     return isGFX9Plus();
4818   case AMDGPU::TBA:
4819   case AMDGPU::TBA_LO:
4820   case AMDGPU::TBA_HI:
4821   case AMDGPU::TMA:
4822   case AMDGPU::TMA_LO:
4823   case AMDGPU::TMA_HI:
4824     return !isGFX9Plus();
4825   case AMDGPU::XNACK_MASK:
4826   case AMDGPU::XNACK_MASK_LO:
4827   case AMDGPU::XNACK_MASK_HI:
4828     return (isVI() || isGFX9()) && hasXNACK();
4829   case AMDGPU::SGPR_NULL:
4830     return isGFX10Plus();
4831   default:
4832     break;
4833   }
4834 
4835   if (isCI())
4836     return true;
4837 
4838   if (isSI() || isGFX10Plus()) {
4839     // No flat_scr on SI.
4840     // On GFX10 flat scratch is not a valid register operand and can only be
4841     // accessed with s_setreg/s_getreg.
4842     switch (RegNo) {
4843     case AMDGPU::FLAT_SCR:
4844     case AMDGPU::FLAT_SCR_LO:
4845     case AMDGPU::FLAT_SCR_HI:
4846       return false;
4847     default:
4848       return true;
4849     }
4850   }
4851 
4852   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4853   // SI/CI have.
4854   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4855        R.isValid(); ++R) {
4856     if (*R == RegNo)
4857       return hasSGPR102_SGPR103();
4858   }
4859 
4860   return true;
4861 }
4862 
4863 OperandMatchResultTy
4864 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4865                               OperandMode Mode) {
4866   // Try to parse with a custom parser
4867   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4868 
4869   // If we successfully parsed the operand or if there as an error parsing,
4870   // we are done.
4871   //
4872   // If we are parsing after we reach EndOfStatement then this means we
4873   // are appending default values to the Operands list.  This is only done
4874   // by custom parser, so we shouldn't continue on to the generic parsing.
4875   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4876       isToken(AsmToken::EndOfStatement))
4877     return ResTy;
4878 
4879   SMLoc RBraceLoc;
4880   SMLoc LBraceLoc = getLoc();
4881   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
4882     unsigned Prefix = Operands.size();
4883 
4884     for (;;) {
4885       ResTy = parseReg(Operands);
4886       if (ResTy != MatchOperand_Success)
4887         return ResTy;
4888 
4889       RBraceLoc = getLoc();
4890       if (trySkipToken(AsmToken::RBrac))
4891         break;
4892 
4893       if (!trySkipToken(AsmToken::Comma))
4894         return MatchOperand_ParseFail;
4895     }
4896 
4897     if (Operands.size() - Prefix > 1) {
4898       Operands.insert(Operands.begin() + Prefix,
4899                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4900       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
4901     }
4902 
4903     return MatchOperand_Success;
4904   }
4905 
4906   return parseRegOrImm(Operands);
4907 }
4908 
4909 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4910   // Clear any forced encodings from the previous instruction.
4911   setForcedEncodingSize(0);
4912   setForcedDPP(false);
4913   setForcedSDWA(false);
4914 
4915   if (Name.endswith("_e64")) {
4916     setForcedEncodingSize(64);
4917     return Name.substr(0, Name.size() - 4);
4918   } else if (Name.endswith("_e32")) {
4919     setForcedEncodingSize(32);
4920     return Name.substr(0, Name.size() - 4);
4921   } else if (Name.endswith("_dpp")) {
4922     setForcedDPP(true);
4923     return Name.substr(0, Name.size() - 4);
4924   } else if (Name.endswith("_sdwa")) {
4925     setForcedSDWA(true);
4926     return Name.substr(0, Name.size() - 5);
4927   }
4928   return Name;
4929 }
4930 
4931 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4932                                        StringRef Name,
4933                                        SMLoc NameLoc, OperandVector &Operands) {
4934   // Add the instruction mnemonic
4935   Name = parseMnemonicSuffix(Name);
4936   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4937 
4938   bool IsMIMG = Name.startswith("image_");
4939 
4940   while (!trySkipToken(AsmToken::EndOfStatement)) {
4941     OperandMode Mode = OperandMode_Default;
4942     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
4943       Mode = OperandMode_NSA;
4944     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4945 
4946     // Eat the comma or space if there is one.
4947     trySkipToken(AsmToken::Comma);
4948 
4949     if (Res != MatchOperand_Success) {
4950       checkUnsupportedInstruction(Name, NameLoc);
4951       if (!Parser.hasPendingError()) {
4952         // FIXME: use real operand location rather than the current location.
4953         StringRef Msg =
4954           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4955                                             "not a valid operand.";
4956         Error(getLoc(), Msg);
4957       }
4958       while (!trySkipToken(AsmToken::EndOfStatement)) {
4959         lex();
4960       }
4961       return true;
4962     }
4963   }
4964 
4965   return false;
4966 }
4967 
4968 //===----------------------------------------------------------------------===//
4969 // Utility functions
4970 //===----------------------------------------------------------------------===//
4971 
4972 OperandMatchResultTy
4973 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4974 
4975   if (!trySkipId(Prefix, AsmToken::Colon))
4976     return MatchOperand_NoMatch;
4977 
4978   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4979 }
4980 
4981 OperandMatchResultTy
4982 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4983                                     AMDGPUOperand::ImmTy ImmTy,
4984                                     bool (*ConvertResult)(int64_t&)) {
4985   SMLoc S = getLoc();
4986   int64_t Value = 0;
4987 
4988   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4989   if (Res != MatchOperand_Success)
4990     return Res;
4991 
4992   if (ConvertResult && !ConvertResult(Value)) {
4993     Error(S, "invalid " + StringRef(Prefix) + " value.");
4994   }
4995 
4996   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4997   return MatchOperand_Success;
4998 }
4999 
5000 OperandMatchResultTy
5001 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5002                                              OperandVector &Operands,
5003                                              AMDGPUOperand::ImmTy ImmTy,
5004                                              bool (*ConvertResult)(int64_t&)) {
5005   SMLoc S = getLoc();
5006   if (!trySkipId(Prefix, AsmToken::Colon))
5007     return MatchOperand_NoMatch;
5008 
5009   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5010     return MatchOperand_ParseFail;
5011 
5012   unsigned Val = 0;
5013   const unsigned MaxSize = 4;
5014 
5015   // FIXME: How to verify the number of elements matches the number of src
5016   // operands?
5017   for (int I = 0; ; ++I) {
5018     int64_t Op;
5019     SMLoc Loc = getLoc();
5020     if (!parseExpr(Op))
5021       return MatchOperand_ParseFail;
5022 
5023     if (Op != 0 && Op != 1) {
5024       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5025       return MatchOperand_ParseFail;
5026     }
5027 
5028     Val |= (Op << I);
5029 
5030     if (trySkipToken(AsmToken::RBrac))
5031       break;
5032 
5033     if (I + 1 == MaxSize) {
5034       Error(getLoc(), "expected a closing square bracket");
5035       return MatchOperand_ParseFail;
5036     }
5037 
5038     if (!skipToken(AsmToken::Comma, "expected a comma"))
5039       return MatchOperand_ParseFail;
5040   }
5041 
5042   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5043   return MatchOperand_Success;
5044 }
5045 
5046 OperandMatchResultTy
5047 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5048                                AMDGPUOperand::ImmTy ImmTy) {
5049   int64_t Bit = 0;
5050   SMLoc S = getLoc();
5051 
5052   // We are at the end of the statement, and this is a default argument, so
5053   // use a default value.
5054   if (!isToken(AsmToken::EndOfStatement)) {
5055     switch(getTokenKind()) {
5056       case AsmToken::Identifier: {
5057         StringRef Tok = getTokenStr();
5058         if (Tok == Name) {
5059           if (Tok == "r128" && !hasMIMG_R128())
5060             Error(S, "r128 modifier is not supported on this GPU");
5061           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5062             Error(S, "a16 modifier is not supported on this GPU");
5063           Bit = 1;
5064           Parser.Lex();
5065         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5066           Bit = 0;
5067           Parser.Lex();
5068         } else {
5069           return MatchOperand_NoMatch;
5070         }
5071         break;
5072       }
5073       default:
5074         return MatchOperand_NoMatch;
5075     }
5076   }
5077 
5078   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
5079     return MatchOperand_ParseFail;
5080 
5081   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5082     ImmTy = AMDGPUOperand::ImmTyR128A16;
5083 
5084   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5085   return MatchOperand_Success;
5086 }
5087 
5088 static void addOptionalImmOperand(
5089   MCInst& Inst, const OperandVector& Operands,
5090   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5091   AMDGPUOperand::ImmTy ImmT,
5092   int64_t Default = 0) {
5093   auto i = OptionalIdx.find(ImmT);
5094   if (i != OptionalIdx.end()) {
5095     unsigned Idx = i->second;
5096     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5097   } else {
5098     Inst.addOperand(MCOperand::createImm(Default));
5099   }
5100 }
5101 
5102 OperandMatchResultTy
5103 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5104                                        StringRef &Value,
5105                                        SMLoc &StringLoc) {
5106   if (!trySkipId(Prefix, AsmToken::Colon))
5107     return MatchOperand_NoMatch;
5108 
5109   StringLoc = getLoc();
5110   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5111                                                   : MatchOperand_ParseFail;
5112 }
5113 
5114 //===----------------------------------------------------------------------===//
5115 // MTBUF format
5116 //===----------------------------------------------------------------------===//
5117 
5118 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5119                                   int64_t MaxVal,
5120                                   int64_t &Fmt) {
5121   int64_t Val;
5122   SMLoc Loc = getLoc();
5123 
5124   auto Res = parseIntWithPrefix(Pref, Val);
5125   if (Res == MatchOperand_ParseFail)
5126     return false;
5127   if (Res == MatchOperand_NoMatch)
5128     return true;
5129 
5130   if (Val < 0 || Val > MaxVal) {
5131     Error(Loc, Twine("out of range ", StringRef(Pref)));
5132     return false;
5133   }
5134 
5135   Fmt = Val;
5136   return true;
5137 }
5138 
5139 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5140 // values to live in a joint format operand in the MCInst encoding.
5141 OperandMatchResultTy
5142 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5143   using namespace llvm::AMDGPU::MTBUFFormat;
5144 
5145   int64_t Dfmt = DFMT_UNDEF;
5146   int64_t Nfmt = NFMT_UNDEF;
5147 
5148   // dfmt and nfmt can appear in either order, and each is optional.
5149   for (int I = 0; I < 2; ++I) {
5150     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5151       return MatchOperand_ParseFail;
5152 
5153     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5154       return MatchOperand_ParseFail;
5155     }
5156     // Skip optional comma between dfmt/nfmt
5157     // but guard against 2 commas following each other.
5158     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5159         !peekToken().is(AsmToken::Comma)) {
5160       trySkipToken(AsmToken::Comma);
5161     }
5162   }
5163 
5164   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5165     return MatchOperand_NoMatch;
5166 
5167   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5168   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5169 
5170   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5171   return MatchOperand_Success;
5172 }
5173 
5174 OperandMatchResultTy
5175 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5176   using namespace llvm::AMDGPU::MTBUFFormat;
5177 
5178   int64_t Fmt = UFMT_UNDEF;
5179 
5180   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5181     return MatchOperand_ParseFail;
5182 
5183   if (Fmt == UFMT_UNDEF)
5184     return MatchOperand_NoMatch;
5185 
5186   Format = Fmt;
5187   return MatchOperand_Success;
5188 }
5189 
5190 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5191                                     int64_t &Nfmt,
5192                                     StringRef FormatStr,
5193                                     SMLoc Loc) {
5194   using namespace llvm::AMDGPU::MTBUFFormat;
5195   int64_t Format;
5196 
5197   Format = getDfmt(FormatStr);
5198   if (Format != DFMT_UNDEF) {
5199     Dfmt = Format;
5200     return true;
5201   }
5202 
5203   Format = getNfmt(FormatStr, getSTI());
5204   if (Format != NFMT_UNDEF) {
5205     Nfmt = Format;
5206     return true;
5207   }
5208 
5209   Error(Loc, "unsupported format");
5210   return false;
5211 }
5212 
5213 OperandMatchResultTy
5214 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5215                                           SMLoc FormatLoc,
5216                                           int64_t &Format) {
5217   using namespace llvm::AMDGPU::MTBUFFormat;
5218 
5219   int64_t Dfmt = DFMT_UNDEF;
5220   int64_t Nfmt = NFMT_UNDEF;
5221   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5222     return MatchOperand_ParseFail;
5223 
5224   if (trySkipToken(AsmToken::Comma)) {
5225     StringRef Str;
5226     SMLoc Loc = getLoc();
5227     if (!parseId(Str, "expected a format string") ||
5228         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5229       return MatchOperand_ParseFail;
5230     }
5231     if (Dfmt == DFMT_UNDEF) {
5232       Error(Loc, "duplicate numeric format");
5233       return MatchOperand_ParseFail;
5234     } else if (Nfmt == NFMT_UNDEF) {
5235       Error(Loc, "duplicate data format");
5236       return MatchOperand_ParseFail;
5237     }
5238   }
5239 
5240   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5241   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5242 
5243   if (isGFX10Plus()) {
5244     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5245     if (Ufmt == UFMT_UNDEF) {
5246       Error(FormatLoc, "unsupported format");
5247       return MatchOperand_ParseFail;
5248     }
5249     Format = Ufmt;
5250   } else {
5251     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5252   }
5253 
5254   return MatchOperand_Success;
5255 }
5256 
5257 OperandMatchResultTy
5258 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5259                                             SMLoc Loc,
5260                                             int64_t &Format) {
5261   using namespace llvm::AMDGPU::MTBUFFormat;
5262 
5263   auto Id = getUnifiedFormat(FormatStr);
5264   if (Id == UFMT_UNDEF)
5265     return MatchOperand_NoMatch;
5266 
5267   if (!isGFX10Plus()) {
5268     Error(Loc, "unified format is not supported on this GPU");
5269     return MatchOperand_ParseFail;
5270   }
5271 
5272   Format = Id;
5273   return MatchOperand_Success;
5274 }
5275 
5276 OperandMatchResultTy
5277 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5278   using namespace llvm::AMDGPU::MTBUFFormat;
5279   SMLoc Loc = getLoc();
5280 
5281   if (!parseExpr(Format))
5282     return MatchOperand_ParseFail;
5283   if (!isValidFormatEncoding(Format, getSTI())) {
5284     Error(Loc, "out of range format");
5285     return MatchOperand_ParseFail;
5286   }
5287 
5288   return MatchOperand_Success;
5289 }
5290 
5291 OperandMatchResultTy
5292 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5293   using namespace llvm::AMDGPU::MTBUFFormat;
5294 
5295   if (!trySkipId("format", AsmToken::Colon))
5296     return MatchOperand_NoMatch;
5297 
5298   if (trySkipToken(AsmToken::LBrac)) {
5299     StringRef FormatStr;
5300     SMLoc Loc = getLoc();
5301     if (!parseId(FormatStr, "expected a format string"))
5302       return MatchOperand_ParseFail;
5303 
5304     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5305     if (Res == MatchOperand_NoMatch)
5306       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5307     if (Res != MatchOperand_Success)
5308       return Res;
5309 
5310     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5311       return MatchOperand_ParseFail;
5312 
5313     return MatchOperand_Success;
5314   }
5315 
5316   return parseNumericFormat(Format);
5317 }
5318 
5319 OperandMatchResultTy
5320 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5321   using namespace llvm::AMDGPU::MTBUFFormat;
5322 
5323   int64_t Format = getDefaultFormatEncoding(getSTI());
5324   OperandMatchResultTy Res;
5325   SMLoc Loc = getLoc();
5326 
5327   // Parse legacy format syntax.
5328   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5329   if (Res == MatchOperand_ParseFail)
5330     return Res;
5331 
5332   bool FormatFound = (Res == MatchOperand_Success);
5333 
5334   Operands.push_back(
5335     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5336 
5337   if (FormatFound)
5338     trySkipToken(AsmToken::Comma);
5339 
5340   if (isToken(AsmToken::EndOfStatement)) {
5341     // We are expecting an soffset operand,
5342     // but let matcher handle the error.
5343     return MatchOperand_Success;
5344   }
5345 
5346   // Parse soffset.
5347   Res = parseRegOrImm(Operands);
5348   if (Res != MatchOperand_Success)
5349     return Res;
5350 
5351   trySkipToken(AsmToken::Comma);
5352 
5353   if (!FormatFound) {
5354     Res = parseSymbolicOrNumericFormat(Format);
5355     if (Res == MatchOperand_ParseFail)
5356       return Res;
5357     if (Res == MatchOperand_Success) {
5358       auto Size = Operands.size();
5359       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5360       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5361       Op.setImm(Format);
5362     }
5363     return MatchOperand_Success;
5364   }
5365 
5366   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5367     Error(getLoc(), "duplicate format");
5368     return MatchOperand_ParseFail;
5369   }
5370   return MatchOperand_Success;
5371 }
5372 
5373 //===----------------------------------------------------------------------===//
5374 // ds
5375 //===----------------------------------------------------------------------===//
5376 
5377 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5378                                     const OperandVector &Operands) {
5379   OptionalImmIndexMap OptionalIdx;
5380 
5381   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5382     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5383 
5384     // Add the register arguments
5385     if (Op.isReg()) {
5386       Op.addRegOperands(Inst, 1);
5387       continue;
5388     }
5389 
5390     // Handle optional arguments
5391     OptionalIdx[Op.getImmTy()] = i;
5392   }
5393 
5394   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5395   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5396   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5397 
5398   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5399 }
5400 
5401 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5402                                 bool IsGdsHardcoded) {
5403   OptionalImmIndexMap OptionalIdx;
5404 
5405   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5406     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5407 
5408     // Add the register arguments
5409     if (Op.isReg()) {
5410       Op.addRegOperands(Inst, 1);
5411       continue;
5412     }
5413 
5414     if (Op.isToken() && Op.getToken() == "gds") {
5415       IsGdsHardcoded = true;
5416       continue;
5417     }
5418 
5419     // Handle optional arguments
5420     OptionalIdx[Op.getImmTy()] = i;
5421   }
5422 
5423   AMDGPUOperand::ImmTy OffsetType =
5424     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5425      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5426      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5427                                                       AMDGPUOperand::ImmTyOffset;
5428 
5429   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5430 
5431   if (!IsGdsHardcoded) {
5432     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5433   }
5434   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5435 }
5436 
5437 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5438   OptionalImmIndexMap OptionalIdx;
5439 
5440   unsigned OperandIdx[4];
5441   unsigned EnMask = 0;
5442   int SrcIdx = 0;
5443 
5444   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5445     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5446 
5447     // Add the register arguments
5448     if (Op.isReg()) {
5449       assert(SrcIdx < 4);
5450       OperandIdx[SrcIdx] = Inst.size();
5451       Op.addRegOperands(Inst, 1);
5452       ++SrcIdx;
5453       continue;
5454     }
5455 
5456     if (Op.isOff()) {
5457       assert(SrcIdx < 4);
5458       OperandIdx[SrcIdx] = Inst.size();
5459       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5460       ++SrcIdx;
5461       continue;
5462     }
5463 
5464     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5465       Op.addImmOperands(Inst, 1);
5466       continue;
5467     }
5468 
5469     if (Op.isToken() && Op.getToken() == "done")
5470       continue;
5471 
5472     // Handle optional arguments
5473     OptionalIdx[Op.getImmTy()] = i;
5474   }
5475 
5476   assert(SrcIdx == 4);
5477 
5478   bool Compr = false;
5479   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5480     Compr = true;
5481     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5482     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5483     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5484   }
5485 
5486   for (auto i = 0; i < SrcIdx; ++i) {
5487     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5488       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5489     }
5490   }
5491 
5492   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5493   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5494 
5495   Inst.addOperand(MCOperand::createImm(EnMask));
5496 }
5497 
5498 //===----------------------------------------------------------------------===//
5499 // s_waitcnt
5500 //===----------------------------------------------------------------------===//
5501 
5502 static bool
5503 encodeCnt(
5504   const AMDGPU::IsaVersion ISA,
5505   int64_t &IntVal,
5506   int64_t CntVal,
5507   bool Saturate,
5508   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5509   unsigned (*decode)(const IsaVersion &Version, unsigned))
5510 {
5511   bool Failed = false;
5512 
5513   IntVal = encode(ISA, IntVal, CntVal);
5514   if (CntVal != decode(ISA, IntVal)) {
5515     if (Saturate) {
5516       IntVal = encode(ISA, IntVal, -1);
5517     } else {
5518       Failed = true;
5519     }
5520   }
5521   return Failed;
5522 }
5523 
5524 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5525 
5526   SMLoc CntLoc = getLoc();
5527   StringRef CntName = getTokenStr();
5528 
5529   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5530       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5531     return false;
5532 
5533   int64_t CntVal;
5534   SMLoc ValLoc = getLoc();
5535   if (!parseExpr(CntVal))
5536     return false;
5537 
5538   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5539 
5540   bool Failed = true;
5541   bool Sat = CntName.endswith("_sat");
5542 
5543   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5544     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5545   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5546     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5547   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5548     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5549   } else {
5550     Error(CntLoc, "invalid counter name " + CntName);
5551     return false;
5552   }
5553 
5554   if (Failed) {
5555     Error(ValLoc, "too large value for " + CntName);
5556     return false;
5557   }
5558 
5559   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5560     return false;
5561 
5562   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5563     if (isToken(AsmToken::EndOfStatement)) {
5564       Error(getLoc(), "expected a counter name");
5565       return false;
5566     }
5567   }
5568 
5569   return true;
5570 }
5571 
5572 OperandMatchResultTy
5573 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5574   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5575   int64_t Waitcnt = getWaitcntBitMask(ISA);
5576   SMLoc S = getLoc();
5577 
5578   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5579     while (!isToken(AsmToken::EndOfStatement)) {
5580       if (!parseCnt(Waitcnt))
5581         return MatchOperand_ParseFail;
5582     }
5583   } else {
5584     if (!parseExpr(Waitcnt))
5585       return MatchOperand_ParseFail;
5586   }
5587 
5588   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5589   return MatchOperand_Success;
5590 }
5591 
5592 bool
5593 AMDGPUOperand::isSWaitCnt() const {
5594   return isImm();
5595 }
5596 
5597 //===----------------------------------------------------------------------===//
5598 // hwreg
5599 //===----------------------------------------------------------------------===//
5600 
5601 bool
5602 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5603                                 OperandInfoTy &Offset,
5604                                 OperandInfoTy &Width) {
5605   using namespace llvm::AMDGPU::Hwreg;
5606 
5607   // The register may be specified by name or using a numeric code
5608   HwReg.Loc = getLoc();
5609   if (isToken(AsmToken::Identifier) &&
5610       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5611     HwReg.IsSymbolic = true;
5612     lex(); // skip register name
5613   } else if (!parseExpr(HwReg.Id, "a register name")) {
5614     return false;
5615   }
5616 
5617   if (trySkipToken(AsmToken::RParen))
5618     return true;
5619 
5620   // parse optional params
5621   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5622     return false;
5623 
5624   Offset.Loc = getLoc();
5625   if (!parseExpr(Offset.Id))
5626     return false;
5627 
5628   if (!skipToken(AsmToken::Comma, "expected a comma"))
5629     return false;
5630 
5631   Width.Loc = getLoc();
5632   return parseExpr(Width.Id) &&
5633          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5634 }
5635 
5636 bool
5637 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5638                                const OperandInfoTy &Offset,
5639                                const OperandInfoTy &Width) {
5640 
5641   using namespace llvm::AMDGPU::Hwreg;
5642 
5643   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5644     Error(HwReg.Loc,
5645           "specified hardware register is not supported on this GPU");
5646     return false;
5647   }
5648   if (!isValidHwreg(HwReg.Id)) {
5649     Error(HwReg.Loc,
5650           "invalid code of hardware register: only 6-bit values are legal");
5651     return false;
5652   }
5653   if (!isValidHwregOffset(Offset.Id)) {
5654     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5655     return false;
5656   }
5657   if (!isValidHwregWidth(Width.Id)) {
5658     Error(Width.Loc,
5659           "invalid bitfield width: only values from 1 to 32 are legal");
5660     return false;
5661   }
5662   return true;
5663 }
5664 
5665 OperandMatchResultTy
5666 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5667   using namespace llvm::AMDGPU::Hwreg;
5668 
5669   int64_t ImmVal = 0;
5670   SMLoc Loc = getLoc();
5671 
5672   if (trySkipId("hwreg", AsmToken::LParen)) {
5673     OperandInfoTy HwReg(ID_UNKNOWN_);
5674     OperandInfoTy Offset(OFFSET_DEFAULT_);
5675     OperandInfoTy Width(WIDTH_DEFAULT_);
5676     if (parseHwregBody(HwReg, Offset, Width) &&
5677         validateHwreg(HwReg, Offset, Width)) {
5678       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5679     } else {
5680       return MatchOperand_ParseFail;
5681     }
5682   } else if (parseExpr(ImmVal, "a hwreg macro")) {
5683     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5684       Error(Loc, "invalid immediate: only 16-bit values are legal");
5685       return MatchOperand_ParseFail;
5686     }
5687   } else {
5688     return MatchOperand_ParseFail;
5689   }
5690 
5691   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5692   return MatchOperand_Success;
5693 }
5694 
5695 bool AMDGPUOperand::isHwreg() const {
5696   return isImmTy(ImmTyHwreg);
5697 }
5698 
5699 //===----------------------------------------------------------------------===//
5700 // sendmsg
5701 //===----------------------------------------------------------------------===//
5702 
5703 bool
5704 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5705                                   OperandInfoTy &Op,
5706                                   OperandInfoTy &Stream) {
5707   using namespace llvm::AMDGPU::SendMsg;
5708 
5709   Msg.Loc = getLoc();
5710   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5711     Msg.IsSymbolic = true;
5712     lex(); // skip message name
5713   } else if (!parseExpr(Msg.Id, "a message name")) {
5714     return false;
5715   }
5716 
5717   if (trySkipToken(AsmToken::Comma)) {
5718     Op.IsDefined = true;
5719     Op.Loc = getLoc();
5720     if (isToken(AsmToken::Identifier) &&
5721         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5722       lex(); // skip operation name
5723     } else if (!parseExpr(Op.Id, "an operation name")) {
5724       return false;
5725     }
5726 
5727     if (trySkipToken(AsmToken::Comma)) {
5728       Stream.IsDefined = true;
5729       Stream.Loc = getLoc();
5730       if (!parseExpr(Stream.Id))
5731         return false;
5732     }
5733   }
5734 
5735   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5736 }
5737 
5738 bool
5739 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5740                                  const OperandInfoTy &Op,
5741                                  const OperandInfoTy &Stream) {
5742   using namespace llvm::AMDGPU::SendMsg;
5743 
5744   // Validation strictness depends on whether message is specified
5745   // in a symbolc or in a numeric form. In the latter case
5746   // only encoding possibility is checked.
5747   bool Strict = Msg.IsSymbolic;
5748 
5749   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5750     Error(Msg.Loc, "invalid message id");
5751     return false;
5752   }
5753   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5754     if (Op.IsDefined) {
5755       Error(Op.Loc, "message does not support operations");
5756     } else {
5757       Error(Msg.Loc, "missing message operation");
5758     }
5759     return false;
5760   }
5761   if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5762     Error(Op.Loc, "invalid operation id");
5763     return false;
5764   }
5765   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5766     Error(Stream.Loc, "message operation does not support streams");
5767     return false;
5768   }
5769   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5770     Error(Stream.Loc, "invalid message stream id");
5771     return false;
5772   }
5773   return true;
5774 }
5775 
5776 OperandMatchResultTy
5777 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5778   using namespace llvm::AMDGPU::SendMsg;
5779 
5780   int64_t ImmVal = 0;
5781   SMLoc Loc = getLoc();
5782 
5783   if (trySkipId("sendmsg", AsmToken::LParen)) {
5784     OperandInfoTy Msg(ID_UNKNOWN_);
5785     OperandInfoTy Op(OP_NONE_);
5786     OperandInfoTy Stream(STREAM_ID_NONE_);
5787     if (parseSendMsgBody(Msg, Op, Stream) &&
5788         validateSendMsg(Msg, Op, Stream)) {
5789       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5790     } else {
5791       return MatchOperand_ParseFail;
5792     }
5793   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
5794     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5795       Error(Loc, "invalid immediate: only 16-bit values are legal");
5796       return MatchOperand_ParseFail;
5797     }
5798   } else {
5799     return MatchOperand_ParseFail;
5800   }
5801 
5802   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5803   return MatchOperand_Success;
5804 }
5805 
5806 bool AMDGPUOperand::isSendMsg() const {
5807   return isImmTy(ImmTySendMsg);
5808 }
5809 
5810 //===----------------------------------------------------------------------===//
5811 // v_interp
5812 //===----------------------------------------------------------------------===//
5813 
5814 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5815   StringRef Str;
5816   SMLoc S = getLoc();
5817 
5818   if (!parseId(Str))
5819     return MatchOperand_NoMatch;
5820 
5821   int Slot = StringSwitch<int>(Str)
5822     .Case("p10", 0)
5823     .Case("p20", 1)
5824     .Case("p0", 2)
5825     .Default(-1);
5826 
5827   if (Slot == -1) {
5828     Error(S, "invalid interpolation slot");
5829     return MatchOperand_ParseFail;
5830   }
5831 
5832   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5833                                               AMDGPUOperand::ImmTyInterpSlot));
5834   return MatchOperand_Success;
5835 }
5836 
5837 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5838   StringRef Str;
5839   SMLoc S = getLoc();
5840 
5841   if (!parseId(Str))
5842     return MatchOperand_NoMatch;
5843 
5844   if (!Str.startswith("attr")) {
5845     Error(S, "invalid interpolation attribute");
5846     return MatchOperand_ParseFail;
5847   }
5848 
5849   StringRef Chan = Str.take_back(2);
5850   int AttrChan = StringSwitch<int>(Chan)
5851     .Case(".x", 0)
5852     .Case(".y", 1)
5853     .Case(".z", 2)
5854     .Case(".w", 3)
5855     .Default(-1);
5856   if (AttrChan == -1) {
5857     Error(S, "invalid or missing interpolation attribute channel");
5858     return MatchOperand_ParseFail;
5859   }
5860 
5861   Str = Str.drop_back(2).drop_front(4);
5862 
5863   uint8_t Attr;
5864   if (Str.getAsInteger(10, Attr)) {
5865     Error(S, "invalid or missing interpolation attribute number");
5866     return MatchOperand_ParseFail;
5867   }
5868 
5869   if (Attr > 63) {
5870     Error(S, "out of bounds interpolation attribute number");
5871     return MatchOperand_ParseFail;
5872   }
5873 
5874   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5875 
5876   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5877                                               AMDGPUOperand::ImmTyInterpAttr));
5878   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5879                                               AMDGPUOperand::ImmTyAttrChan));
5880   return MatchOperand_Success;
5881 }
5882 
5883 //===----------------------------------------------------------------------===//
5884 // exp
5885 //===----------------------------------------------------------------------===//
5886 
5887 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5888                                                       uint8_t &Val) {
5889   if (Str == "null") {
5890     Val = Exp::ET_NULL;
5891     return MatchOperand_Success;
5892   }
5893 
5894   if (Str.startswith("mrt")) {
5895     Str = Str.drop_front(3);
5896     if (Str == "z") { // == mrtz
5897       Val = Exp::ET_MRTZ;
5898       return MatchOperand_Success;
5899     }
5900 
5901     if (Str.getAsInteger(10, Val))
5902       return MatchOperand_ParseFail;
5903 
5904     if (Val > Exp::ET_MRT7)
5905       return MatchOperand_ParseFail;
5906 
5907     return MatchOperand_Success;
5908   }
5909 
5910   if (Str.startswith("pos")) {
5911     Str = Str.drop_front(3);
5912     if (Str.getAsInteger(10, Val))
5913       return MatchOperand_ParseFail;
5914 
5915     if (Val > (isGFX10Plus() ? 4 : 3))
5916       return MatchOperand_ParseFail;
5917 
5918     Val += Exp::ET_POS0;
5919     return MatchOperand_Success;
5920   }
5921 
5922   if (isGFX10Plus() && Str == "prim") {
5923     Val = Exp::ET_PRIM;
5924     return MatchOperand_Success;
5925   }
5926 
5927   if (Str.startswith("param")) {
5928     Str = Str.drop_front(5);
5929     if (Str.getAsInteger(10, Val))
5930       return MatchOperand_ParseFail;
5931 
5932     if (Val >= 32)
5933       return MatchOperand_ParseFail;
5934 
5935     Val += Exp::ET_PARAM0;
5936     return MatchOperand_Success;
5937   }
5938 
5939   return MatchOperand_ParseFail;
5940 }
5941 
5942 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5943   StringRef Str;
5944   SMLoc S = getLoc();
5945 
5946   if (!parseId(Str))
5947     return MatchOperand_NoMatch;
5948 
5949   uint8_t Val;
5950   auto Res = parseExpTgtImpl(Str, Val);
5951   if (Res != MatchOperand_Success) {
5952     Error(S, "invalid exp target");
5953     return Res;
5954   }
5955 
5956   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5957                                               AMDGPUOperand::ImmTyExpTgt));
5958   return MatchOperand_Success;
5959 }
5960 
5961 //===----------------------------------------------------------------------===//
5962 // parser helpers
5963 //===----------------------------------------------------------------------===//
5964 
5965 bool
5966 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5967   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5968 }
5969 
5970 bool
5971 AMDGPUAsmParser::isId(const StringRef Id) const {
5972   return isId(getToken(), Id);
5973 }
5974 
5975 bool
5976 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5977   return getTokenKind() == Kind;
5978 }
5979 
5980 bool
5981 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5982   if (isId(Id)) {
5983     lex();
5984     return true;
5985   }
5986   return false;
5987 }
5988 
5989 bool
5990 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5991   if (isId(Id) && peekToken().is(Kind)) {
5992     lex();
5993     lex();
5994     return true;
5995   }
5996   return false;
5997 }
5998 
5999 bool
6000 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6001   if (isToken(Kind)) {
6002     lex();
6003     return true;
6004   }
6005   return false;
6006 }
6007 
6008 bool
6009 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6010                            const StringRef ErrMsg) {
6011   if (!trySkipToken(Kind)) {
6012     Error(getLoc(), ErrMsg);
6013     return false;
6014   }
6015   return true;
6016 }
6017 
6018 bool
6019 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6020   SMLoc S = getLoc();
6021 
6022   const MCExpr *Expr;
6023   if (Parser.parseExpression(Expr))
6024     return false;
6025 
6026   if (Expr->evaluateAsAbsolute(Imm))
6027     return true;
6028 
6029   if (Expected.empty()) {
6030     Error(S, "expected absolute expression");
6031   } else {
6032     Error(S, Twine("expected ", Expected) +
6033              Twine(" or an absolute expression"));
6034   }
6035   return false;
6036 }
6037 
6038 bool
6039 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6040   SMLoc S = getLoc();
6041 
6042   const MCExpr *Expr;
6043   if (Parser.parseExpression(Expr))
6044     return false;
6045 
6046   int64_t IntVal;
6047   if (Expr->evaluateAsAbsolute(IntVal)) {
6048     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6049   } else {
6050     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6051   }
6052   return true;
6053 }
6054 
6055 bool
6056 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6057   if (isToken(AsmToken::String)) {
6058     Val = getToken().getStringContents();
6059     lex();
6060     return true;
6061   } else {
6062     Error(getLoc(), ErrMsg);
6063     return false;
6064   }
6065 }
6066 
6067 bool
6068 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6069   if (isToken(AsmToken::Identifier)) {
6070     Val = getTokenStr();
6071     lex();
6072     return true;
6073   } else {
6074     if (!ErrMsg.empty())
6075       Error(getLoc(), ErrMsg);
6076     return false;
6077   }
6078 }
6079 
6080 AsmToken
6081 AMDGPUAsmParser::getToken() const {
6082   return Parser.getTok();
6083 }
6084 
6085 AsmToken
6086 AMDGPUAsmParser::peekToken() {
6087   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6088 }
6089 
6090 void
6091 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6092   auto TokCount = getLexer().peekTokens(Tokens);
6093 
6094   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6095     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6096 }
6097 
6098 AsmToken::TokenKind
6099 AMDGPUAsmParser::getTokenKind() const {
6100   return getLexer().getKind();
6101 }
6102 
6103 SMLoc
6104 AMDGPUAsmParser::getLoc() const {
6105   return getToken().getLoc();
6106 }
6107 
6108 StringRef
6109 AMDGPUAsmParser::getTokenStr() const {
6110   return getToken().getString();
6111 }
6112 
6113 void
6114 AMDGPUAsmParser::lex() {
6115   Parser.Lex();
6116 }
6117 
6118 SMLoc
6119 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6120                                const OperandVector &Operands) const {
6121   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6122     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6123     if (Test(Op))
6124       return Op.getStartLoc();
6125   }
6126   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6127 }
6128 
6129 SMLoc
6130 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6131                            const OperandVector &Operands) const {
6132   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6133   return getOperandLoc(Test, Operands);
6134 }
6135 
6136 SMLoc
6137 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6138                            const OperandVector &Operands) const {
6139   auto Test = [=](const AMDGPUOperand& Op) {
6140     return Op.isRegKind() && Op.getReg() == Reg;
6141   };
6142   return getOperandLoc(Test, Operands);
6143 }
6144 
6145 SMLoc
6146 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6147   auto Test = [](const AMDGPUOperand& Op) {
6148     return Op.IsImmKindLiteral() || Op.isExpr();
6149   };
6150   return getOperandLoc(Test, Operands);
6151 }
6152 
6153 SMLoc
6154 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6155   auto Test = [](const AMDGPUOperand& Op) {
6156     return Op.isImmKindConst();
6157   };
6158   return getOperandLoc(Test, Operands);
6159 }
6160 
6161 //===----------------------------------------------------------------------===//
6162 // swizzle
6163 //===----------------------------------------------------------------------===//
6164 
6165 LLVM_READNONE
6166 static unsigned
6167 encodeBitmaskPerm(const unsigned AndMask,
6168                   const unsigned OrMask,
6169                   const unsigned XorMask) {
6170   using namespace llvm::AMDGPU::Swizzle;
6171 
6172   return BITMASK_PERM_ENC |
6173          (AndMask << BITMASK_AND_SHIFT) |
6174          (OrMask  << BITMASK_OR_SHIFT)  |
6175          (XorMask << BITMASK_XOR_SHIFT);
6176 }
6177 
6178 bool
6179 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6180                                      const unsigned MinVal,
6181                                      const unsigned MaxVal,
6182                                      const StringRef ErrMsg,
6183                                      SMLoc &Loc) {
6184   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6185     return false;
6186   }
6187   Loc = getLoc();
6188   if (!parseExpr(Op)) {
6189     return false;
6190   }
6191   if (Op < MinVal || Op > MaxVal) {
6192     Error(Loc, ErrMsg);
6193     return false;
6194   }
6195 
6196   return true;
6197 }
6198 
6199 bool
6200 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6201                                       const unsigned MinVal,
6202                                       const unsigned MaxVal,
6203                                       const StringRef ErrMsg) {
6204   SMLoc Loc;
6205   for (unsigned i = 0; i < OpNum; ++i) {
6206     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6207       return false;
6208   }
6209 
6210   return true;
6211 }
6212 
6213 bool
6214 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6215   using namespace llvm::AMDGPU::Swizzle;
6216 
6217   int64_t Lane[LANE_NUM];
6218   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6219                            "expected a 2-bit lane id")) {
6220     Imm = QUAD_PERM_ENC;
6221     for (unsigned I = 0; I < LANE_NUM; ++I) {
6222       Imm |= Lane[I] << (LANE_SHIFT * I);
6223     }
6224     return true;
6225   }
6226   return false;
6227 }
6228 
6229 bool
6230 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6231   using namespace llvm::AMDGPU::Swizzle;
6232 
6233   SMLoc Loc;
6234   int64_t GroupSize;
6235   int64_t LaneIdx;
6236 
6237   if (!parseSwizzleOperand(GroupSize,
6238                            2, 32,
6239                            "group size must be in the interval [2,32]",
6240                            Loc)) {
6241     return false;
6242   }
6243   if (!isPowerOf2_64(GroupSize)) {
6244     Error(Loc, "group size must be a power of two");
6245     return false;
6246   }
6247   if (parseSwizzleOperand(LaneIdx,
6248                           0, GroupSize - 1,
6249                           "lane id must be in the interval [0,group size - 1]",
6250                           Loc)) {
6251     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6252     return true;
6253   }
6254   return false;
6255 }
6256 
6257 bool
6258 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6259   using namespace llvm::AMDGPU::Swizzle;
6260 
6261   SMLoc Loc;
6262   int64_t GroupSize;
6263 
6264   if (!parseSwizzleOperand(GroupSize,
6265                            2, 32,
6266                            "group size must be in the interval [2,32]",
6267                            Loc)) {
6268     return false;
6269   }
6270   if (!isPowerOf2_64(GroupSize)) {
6271     Error(Loc, "group size must be a power of two");
6272     return false;
6273   }
6274 
6275   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6276   return true;
6277 }
6278 
6279 bool
6280 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6281   using namespace llvm::AMDGPU::Swizzle;
6282 
6283   SMLoc Loc;
6284   int64_t GroupSize;
6285 
6286   if (!parseSwizzleOperand(GroupSize,
6287                            1, 16,
6288                            "group size must be in the interval [1,16]",
6289                            Loc)) {
6290     return false;
6291   }
6292   if (!isPowerOf2_64(GroupSize)) {
6293     Error(Loc, "group size must be a power of two");
6294     return false;
6295   }
6296 
6297   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6298   return true;
6299 }
6300 
6301 bool
6302 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6303   using namespace llvm::AMDGPU::Swizzle;
6304 
6305   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6306     return false;
6307   }
6308 
6309   StringRef Ctl;
6310   SMLoc StrLoc = getLoc();
6311   if (!parseString(Ctl)) {
6312     return false;
6313   }
6314   if (Ctl.size() != BITMASK_WIDTH) {
6315     Error(StrLoc, "expected a 5-character mask");
6316     return false;
6317   }
6318 
6319   unsigned AndMask = 0;
6320   unsigned OrMask = 0;
6321   unsigned XorMask = 0;
6322 
6323   for (size_t i = 0; i < Ctl.size(); ++i) {
6324     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6325     switch(Ctl[i]) {
6326     default:
6327       Error(StrLoc, "invalid mask");
6328       return false;
6329     case '0':
6330       break;
6331     case '1':
6332       OrMask |= Mask;
6333       break;
6334     case 'p':
6335       AndMask |= Mask;
6336       break;
6337     case 'i':
6338       AndMask |= Mask;
6339       XorMask |= Mask;
6340       break;
6341     }
6342   }
6343 
6344   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6345   return true;
6346 }
6347 
6348 bool
6349 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6350 
6351   SMLoc OffsetLoc = getLoc();
6352 
6353   if (!parseExpr(Imm, "a swizzle macro")) {
6354     return false;
6355   }
6356   if (!isUInt<16>(Imm)) {
6357     Error(OffsetLoc, "expected a 16-bit offset");
6358     return false;
6359   }
6360   return true;
6361 }
6362 
6363 bool
6364 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6365   using namespace llvm::AMDGPU::Swizzle;
6366 
6367   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6368 
6369     SMLoc ModeLoc = getLoc();
6370     bool Ok = false;
6371 
6372     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6373       Ok = parseSwizzleQuadPerm(Imm);
6374     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6375       Ok = parseSwizzleBitmaskPerm(Imm);
6376     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6377       Ok = parseSwizzleBroadcast(Imm);
6378     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6379       Ok = parseSwizzleSwap(Imm);
6380     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6381       Ok = parseSwizzleReverse(Imm);
6382     } else {
6383       Error(ModeLoc, "expected a swizzle mode");
6384     }
6385 
6386     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6387   }
6388 
6389   return false;
6390 }
6391 
6392 OperandMatchResultTy
6393 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6394   SMLoc S = getLoc();
6395   int64_t Imm = 0;
6396 
6397   if (trySkipId("offset")) {
6398 
6399     bool Ok = false;
6400     if (skipToken(AsmToken::Colon, "expected a colon")) {
6401       if (trySkipId("swizzle")) {
6402         Ok = parseSwizzleMacro(Imm);
6403       } else {
6404         Ok = parseSwizzleOffset(Imm);
6405       }
6406     }
6407 
6408     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6409 
6410     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6411   } else {
6412     // Swizzle "offset" operand is optional.
6413     // If it is omitted, try parsing other optional operands.
6414     return parseOptionalOpr(Operands);
6415   }
6416 }
6417 
6418 bool
6419 AMDGPUOperand::isSwizzle() const {
6420   return isImmTy(ImmTySwizzle);
6421 }
6422 
6423 //===----------------------------------------------------------------------===//
6424 // VGPR Index Mode
6425 //===----------------------------------------------------------------------===//
6426 
6427 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6428 
6429   using namespace llvm::AMDGPU::VGPRIndexMode;
6430 
6431   if (trySkipToken(AsmToken::RParen)) {
6432     return OFF;
6433   }
6434 
6435   int64_t Imm = 0;
6436 
6437   while (true) {
6438     unsigned Mode = 0;
6439     SMLoc S = getLoc();
6440 
6441     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6442       if (trySkipId(IdSymbolic[ModeId])) {
6443         Mode = 1 << ModeId;
6444         break;
6445       }
6446     }
6447 
6448     if (Mode == 0) {
6449       Error(S, (Imm == 0)?
6450                "expected a VGPR index mode or a closing parenthesis" :
6451                "expected a VGPR index mode");
6452       return UNDEF;
6453     }
6454 
6455     if (Imm & Mode) {
6456       Error(S, "duplicate VGPR index mode");
6457       return UNDEF;
6458     }
6459     Imm |= Mode;
6460 
6461     if (trySkipToken(AsmToken::RParen))
6462       break;
6463     if (!skipToken(AsmToken::Comma,
6464                    "expected a comma or a closing parenthesis"))
6465       return UNDEF;
6466   }
6467 
6468   return Imm;
6469 }
6470 
6471 OperandMatchResultTy
6472 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6473 
6474   using namespace llvm::AMDGPU::VGPRIndexMode;
6475 
6476   int64_t Imm = 0;
6477   SMLoc S = getLoc();
6478 
6479   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6480     Imm = parseGPRIdxMacro();
6481     if (Imm == UNDEF)
6482       return MatchOperand_ParseFail;
6483   } else {
6484     if (getParser().parseAbsoluteExpression(Imm))
6485       return MatchOperand_ParseFail;
6486     if (Imm < 0 || !isUInt<4>(Imm)) {
6487       Error(S, "invalid immediate: only 4-bit values are legal");
6488       return MatchOperand_ParseFail;
6489     }
6490   }
6491 
6492   Operands.push_back(
6493       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6494   return MatchOperand_Success;
6495 }
6496 
6497 bool AMDGPUOperand::isGPRIdxMode() const {
6498   return isImmTy(ImmTyGprIdxMode);
6499 }
6500 
6501 //===----------------------------------------------------------------------===//
6502 // sopp branch targets
6503 //===----------------------------------------------------------------------===//
6504 
6505 OperandMatchResultTy
6506 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6507 
6508   // Make sure we are not parsing something
6509   // that looks like a label or an expression but is not.
6510   // This will improve error messages.
6511   if (isRegister() || isModifier())
6512     return MatchOperand_NoMatch;
6513 
6514   if (!parseExpr(Operands))
6515     return MatchOperand_ParseFail;
6516 
6517   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6518   assert(Opr.isImm() || Opr.isExpr());
6519   SMLoc Loc = Opr.getStartLoc();
6520 
6521   // Currently we do not support arbitrary expressions as branch targets.
6522   // Only labels and absolute expressions are accepted.
6523   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6524     Error(Loc, "expected an absolute expression or a label");
6525   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6526     Error(Loc, "expected a 16-bit signed jump offset");
6527   }
6528 
6529   return MatchOperand_Success;
6530 }
6531 
6532 //===----------------------------------------------------------------------===//
6533 // Boolean holding registers
6534 //===----------------------------------------------------------------------===//
6535 
6536 OperandMatchResultTy
6537 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6538   return parseReg(Operands);
6539 }
6540 
6541 //===----------------------------------------------------------------------===//
6542 // mubuf
6543 //===----------------------------------------------------------------------===//
6544 
6545 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6546   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6547 }
6548 
6549 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6550   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6551 }
6552 
6553 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6554   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6555 }
6556 
6557 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6558   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6559 }
6560 
6561 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6562                                const OperandVector &Operands,
6563                                bool IsAtomic,
6564                                bool IsAtomicReturn,
6565                                bool IsLds) {
6566   bool IsLdsOpcode = IsLds;
6567   bool HasLdsModifier = false;
6568   OptionalImmIndexMap OptionalIdx;
6569   assert(IsAtomicReturn ? IsAtomic : true);
6570   unsigned FirstOperandIdx = 1;
6571 
6572   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6573     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6574 
6575     // Add the register arguments
6576     if (Op.isReg()) {
6577       Op.addRegOperands(Inst, 1);
6578       // Insert a tied src for atomic return dst.
6579       // This cannot be postponed as subsequent calls to
6580       // addImmOperands rely on correct number of MC operands.
6581       if (IsAtomicReturn && i == FirstOperandIdx)
6582         Op.addRegOperands(Inst, 1);
6583       continue;
6584     }
6585 
6586     // Handle the case where soffset is an immediate
6587     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6588       Op.addImmOperands(Inst, 1);
6589       continue;
6590     }
6591 
6592     HasLdsModifier |= Op.isLDS();
6593 
6594     // Handle tokens like 'offen' which are sometimes hard-coded into the
6595     // asm string.  There are no MCInst operands for these.
6596     if (Op.isToken()) {
6597       continue;
6598     }
6599     assert(Op.isImm());
6600 
6601     // Handle optional arguments
6602     OptionalIdx[Op.getImmTy()] = i;
6603   }
6604 
6605   // This is a workaround for an llvm quirk which may result in an
6606   // incorrect instruction selection. Lds and non-lds versions of
6607   // MUBUF instructions are identical except that lds versions
6608   // have mandatory 'lds' modifier. However this modifier follows
6609   // optional modifiers and llvm asm matcher regards this 'lds'
6610   // modifier as an optional one. As a result, an lds version
6611   // of opcode may be selected even if it has no 'lds' modifier.
6612   if (IsLdsOpcode && !HasLdsModifier) {
6613     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6614     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6615       Inst.setOpcode(NoLdsOpcode);
6616       IsLdsOpcode = false;
6617     }
6618   }
6619 
6620   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6621   if (!IsAtomic || IsAtomicReturn) {
6622     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6623                           IsAtomicReturn ? -1 : 0);
6624   }
6625   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6626 
6627   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6628     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6629   }
6630 
6631   if (isGFX10Plus())
6632     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6633 }
6634 
6635 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6636   OptionalImmIndexMap OptionalIdx;
6637 
6638   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6639     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6640 
6641     // Add the register arguments
6642     if (Op.isReg()) {
6643       Op.addRegOperands(Inst, 1);
6644       continue;
6645     }
6646 
6647     // Handle the case where soffset is an immediate
6648     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6649       Op.addImmOperands(Inst, 1);
6650       continue;
6651     }
6652 
6653     // Handle tokens like 'offen' which are sometimes hard-coded into the
6654     // asm string.  There are no MCInst operands for these.
6655     if (Op.isToken()) {
6656       continue;
6657     }
6658     assert(Op.isImm());
6659 
6660     // Handle optional arguments
6661     OptionalIdx[Op.getImmTy()] = i;
6662   }
6663 
6664   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6665                         AMDGPUOperand::ImmTyOffset);
6666   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6667   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6668   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6669   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6670 
6671   if (isGFX10Plus())
6672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6673 }
6674 
6675 //===----------------------------------------------------------------------===//
6676 // mimg
6677 //===----------------------------------------------------------------------===//
6678 
6679 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6680                               bool IsAtomic) {
6681   unsigned I = 1;
6682   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6683   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6684     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6685   }
6686 
6687   if (IsAtomic) {
6688     // Add src, same as dst
6689     assert(Desc.getNumDefs() == 1);
6690     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6691   }
6692 
6693   OptionalImmIndexMap OptionalIdx;
6694 
6695   for (unsigned E = Operands.size(); I != E; ++I) {
6696     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6697 
6698     // Add the register arguments
6699     if (Op.isReg()) {
6700       Op.addRegOperands(Inst, 1);
6701     } else if (Op.isImmModifier()) {
6702       OptionalIdx[Op.getImmTy()] = I;
6703     } else if (!Op.isToken()) {
6704       llvm_unreachable("unexpected operand type");
6705     }
6706   }
6707 
6708   bool IsGFX10Plus = isGFX10Plus();
6709 
6710   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6711   if (IsGFX10Plus)
6712     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6713   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6714   if (IsGFX10Plus)
6715     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6716   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6717   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6719   if (IsGFX10Plus)
6720     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6721   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6722   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6723   if (!IsGFX10Plus)
6724     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6725   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6726 }
6727 
6728 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6729   cvtMIMG(Inst, Operands, true);
6730 }
6731 
6732 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6733                                       const OperandVector &Operands) {
6734   for (unsigned I = 1; I < Operands.size(); ++I) {
6735     auto &Operand = (AMDGPUOperand &)*Operands[I];
6736     if (Operand.isReg())
6737       Operand.addRegOperands(Inst, 1);
6738   }
6739 
6740   Inst.addOperand(MCOperand::createImm(1)); // a16
6741 }
6742 
6743 //===----------------------------------------------------------------------===//
6744 // smrd
6745 //===----------------------------------------------------------------------===//
6746 
6747 bool AMDGPUOperand::isSMRDOffset8() const {
6748   return isImm() && isUInt<8>(getImm());
6749 }
6750 
6751 bool AMDGPUOperand::isSMEMOffset() const {
6752   return isImm(); // Offset range is checked later by validator.
6753 }
6754 
6755 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6756   // 32-bit literals are only supported on CI and we only want to use them
6757   // when the offset is > 8-bits.
6758   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6759 }
6760 
6761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6762   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6763 }
6764 
6765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6766   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6767 }
6768 
6769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6770   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6771 }
6772 
6773 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6774   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6775 }
6776 
6777 //===----------------------------------------------------------------------===//
6778 // vop3
6779 //===----------------------------------------------------------------------===//
6780 
6781 static bool ConvertOmodMul(int64_t &Mul) {
6782   if (Mul != 1 && Mul != 2 && Mul != 4)
6783     return false;
6784 
6785   Mul >>= 1;
6786   return true;
6787 }
6788 
6789 static bool ConvertOmodDiv(int64_t &Div) {
6790   if (Div == 1) {
6791     Div = 0;
6792     return true;
6793   }
6794 
6795   if (Div == 2) {
6796     Div = 3;
6797     return true;
6798   }
6799 
6800   return false;
6801 }
6802 
6803 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6804   if (BoundCtrl == 0) {
6805     BoundCtrl = 1;
6806     return true;
6807   }
6808 
6809   if (BoundCtrl == -1) {
6810     BoundCtrl = 0;
6811     return true;
6812   }
6813 
6814   return false;
6815 }
6816 
6817 // Note: the order in this table matches the order of operands in AsmString.
6818 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6819   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6820   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6821   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6822   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6823   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6824   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6825   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6826   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6827   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6828   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6829   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6830   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6831   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6832   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6833   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6834   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6835   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6836   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6837   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6838   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6839   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6840   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6841   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6842   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6843   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6844   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6845   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6846   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6847   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6848   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6849   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6850   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6851   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6852   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6853   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6854   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6855   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6856   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6857   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6858   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6859   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6860   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6861   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6862 };
6863 
6864 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6865 
6866   OperandMatchResultTy res = parseOptionalOpr(Operands);
6867 
6868   // This is a hack to enable hardcoded mandatory operands which follow
6869   // optional operands.
6870   //
6871   // Current design assumes that all operands after the first optional operand
6872   // are also optional. However implementation of some instructions violates
6873   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6874   //
6875   // To alleviate this problem, we have to (implicitly) parse extra operands
6876   // to make sure autogenerated parser of custom operands never hit hardcoded
6877   // mandatory operands.
6878 
6879   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6880     if (res != MatchOperand_Success ||
6881         isToken(AsmToken::EndOfStatement))
6882       break;
6883 
6884     trySkipToken(AsmToken::Comma);
6885     res = parseOptionalOpr(Operands);
6886   }
6887 
6888   return res;
6889 }
6890 
6891 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6892   OperandMatchResultTy res;
6893   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6894     // try to parse any optional operand here
6895     if (Op.IsBit) {
6896       res = parseNamedBit(Op.Name, Operands, Op.Type);
6897     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6898       res = parseOModOperand(Operands);
6899     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6900                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6901                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6902       res = parseSDWASel(Operands, Op.Name, Op.Type);
6903     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6904       res = parseSDWADstUnused(Operands);
6905     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6906                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6907                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6908                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6909       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6910                                         Op.ConvertResult);
6911     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6912       res = parseDim(Operands);
6913     } else {
6914       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6915     }
6916     if (res != MatchOperand_NoMatch) {
6917       return res;
6918     }
6919   }
6920   return MatchOperand_NoMatch;
6921 }
6922 
6923 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6924   StringRef Name = getTokenStr();
6925   if (Name == "mul") {
6926     return parseIntWithPrefix("mul", Operands,
6927                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6928   }
6929 
6930   if (Name == "div") {
6931     return parseIntWithPrefix("div", Operands,
6932                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6933   }
6934 
6935   return MatchOperand_NoMatch;
6936 }
6937 
6938 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6939   cvtVOP3P(Inst, Operands);
6940 
6941   int Opc = Inst.getOpcode();
6942 
6943   int SrcNum;
6944   const int Ops[] = { AMDGPU::OpName::src0,
6945                       AMDGPU::OpName::src1,
6946                       AMDGPU::OpName::src2 };
6947   for (SrcNum = 0;
6948        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6949        ++SrcNum);
6950   assert(SrcNum > 0);
6951 
6952   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6953   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6954 
6955   if ((OpSel & (1 << SrcNum)) != 0) {
6956     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6957     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6958     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6959   }
6960 }
6961 
6962 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6963       // 1. This operand is input modifiers
6964   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6965       // 2. This is not last operand
6966       && Desc.NumOperands > (OpNum + 1)
6967       // 3. Next operand is register class
6968       && Desc.OpInfo[OpNum + 1].RegClass != -1
6969       // 4. Next register is not tied to any other operand
6970       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6971 }
6972 
6973 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6974 {
6975   OptionalImmIndexMap OptionalIdx;
6976   unsigned Opc = Inst.getOpcode();
6977 
6978   unsigned I = 1;
6979   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6980   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6981     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6982   }
6983 
6984   for (unsigned E = Operands.size(); I != E; ++I) {
6985     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6986     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6987       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6988     } else if (Op.isInterpSlot() ||
6989                Op.isInterpAttr() ||
6990                Op.isAttrChan()) {
6991       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6992     } else if (Op.isImmModifier()) {
6993       OptionalIdx[Op.getImmTy()] = I;
6994     } else {
6995       llvm_unreachable("unhandled operand type");
6996     }
6997   }
6998 
6999   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7000     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7001   }
7002 
7003   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7004     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7005   }
7006 
7007   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7008     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7009   }
7010 }
7011 
7012 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7013                               OptionalImmIndexMap &OptionalIdx) {
7014   unsigned Opc = Inst.getOpcode();
7015 
7016   unsigned I = 1;
7017   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7018   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7019     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7020   }
7021 
7022   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7023     // This instruction has src modifiers
7024     for (unsigned E = Operands.size(); I != E; ++I) {
7025       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7026       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7027         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7028       } else if (Op.isImmModifier()) {
7029         OptionalIdx[Op.getImmTy()] = I;
7030       } else if (Op.isRegOrImm()) {
7031         Op.addRegOrImmOperands(Inst, 1);
7032       } else {
7033         llvm_unreachable("unhandled operand type");
7034       }
7035     }
7036   } else {
7037     // No src modifiers
7038     for (unsigned E = Operands.size(); I != E; ++I) {
7039       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7040       if (Op.isMod()) {
7041         OptionalIdx[Op.getImmTy()] = I;
7042       } else {
7043         Op.addRegOrImmOperands(Inst, 1);
7044       }
7045     }
7046   }
7047 
7048   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7049     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7050   }
7051 
7052   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7053     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7054   }
7055 
7056   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7057   // it has src2 register operand that is tied to dst operand
7058   // we don't allow modifiers for this operand in assembler so src2_modifiers
7059   // should be 0.
7060   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7061       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7062       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7063       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7064       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7065       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7066       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7067       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7068       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7069       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7070     auto it = Inst.begin();
7071     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7072     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7073     ++it;
7074     // Copy the operand to ensure it's not invalidated when Inst grows.
7075     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7076   }
7077 }
7078 
7079 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7080   OptionalImmIndexMap OptionalIdx;
7081   cvtVOP3(Inst, Operands, OptionalIdx);
7082 }
7083 
7084 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7085                                const OperandVector &Operands) {
7086   OptionalImmIndexMap OptIdx;
7087   const int Opc = Inst.getOpcode();
7088   const MCInstrDesc &Desc = MII.get(Opc);
7089 
7090   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7091 
7092   cvtVOP3(Inst, Operands, OptIdx);
7093 
7094   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7095     assert(!IsPacked);
7096     Inst.addOperand(Inst.getOperand(0));
7097   }
7098 
7099   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7100   // instruction, and then figure out where to actually put the modifiers
7101 
7102   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7103 
7104   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7105   if (OpSelHiIdx != -1) {
7106     int DefaultVal = IsPacked ? -1 : 0;
7107     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7108                           DefaultVal);
7109   }
7110 
7111   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7112   if (NegLoIdx != -1) {
7113     assert(IsPacked);
7114     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7115     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7116   }
7117 
7118   const int Ops[] = { AMDGPU::OpName::src0,
7119                       AMDGPU::OpName::src1,
7120                       AMDGPU::OpName::src2 };
7121   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7122                          AMDGPU::OpName::src1_modifiers,
7123                          AMDGPU::OpName::src2_modifiers };
7124 
7125   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7126 
7127   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7128   unsigned OpSelHi = 0;
7129   unsigned NegLo = 0;
7130   unsigned NegHi = 0;
7131 
7132   if (OpSelHiIdx != -1) {
7133     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7134   }
7135 
7136   if (NegLoIdx != -1) {
7137     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7138     NegLo = Inst.getOperand(NegLoIdx).getImm();
7139     NegHi = Inst.getOperand(NegHiIdx).getImm();
7140   }
7141 
7142   for (int J = 0; J < 3; ++J) {
7143     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7144     if (OpIdx == -1)
7145       break;
7146 
7147     uint32_t ModVal = 0;
7148 
7149     if ((OpSel & (1 << J)) != 0)
7150       ModVal |= SISrcMods::OP_SEL_0;
7151 
7152     if ((OpSelHi & (1 << J)) != 0)
7153       ModVal |= SISrcMods::OP_SEL_1;
7154 
7155     if ((NegLo & (1 << J)) != 0)
7156       ModVal |= SISrcMods::NEG;
7157 
7158     if ((NegHi & (1 << J)) != 0)
7159       ModVal |= SISrcMods::NEG_HI;
7160 
7161     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7162 
7163     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7164   }
7165 }
7166 
7167 //===----------------------------------------------------------------------===//
7168 // dpp
7169 //===----------------------------------------------------------------------===//
7170 
7171 bool AMDGPUOperand::isDPP8() const {
7172   return isImmTy(ImmTyDPP8);
7173 }
7174 
7175 bool AMDGPUOperand::isDPPCtrl() const {
7176   using namespace AMDGPU::DPP;
7177 
7178   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7179   if (result) {
7180     int64_t Imm = getImm();
7181     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7182            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7183            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7184            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7185            (Imm == DppCtrl::WAVE_SHL1) ||
7186            (Imm == DppCtrl::WAVE_ROL1) ||
7187            (Imm == DppCtrl::WAVE_SHR1) ||
7188            (Imm == DppCtrl::WAVE_ROR1) ||
7189            (Imm == DppCtrl::ROW_MIRROR) ||
7190            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7191            (Imm == DppCtrl::BCAST15) ||
7192            (Imm == DppCtrl::BCAST31) ||
7193            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7194            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7195   }
7196   return false;
7197 }
7198 
7199 //===----------------------------------------------------------------------===//
7200 // mAI
7201 //===----------------------------------------------------------------------===//
7202 
7203 bool AMDGPUOperand::isBLGP() const {
7204   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7205 }
7206 
7207 bool AMDGPUOperand::isCBSZ() const {
7208   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7209 }
7210 
7211 bool AMDGPUOperand::isABID() const {
7212   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7213 }
7214 
7215 bool AMDGPUOperand::isS16Imm() const {
7216   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7217 }
7218 
7219 bool AMDGPUOperand::isU16Imm() const {
7220   return isImm() && isUInt<16>(getImm());
7221 }
7222 
7223 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7224   if (!isGFX10Plus())
7225     return MatchOperand_NoMatch;
7226 
7227   SMLoc S = getLoc();
7228 
7229   if (!trySkipId("dim", AsmToken::Colon))
7230     return MatchOperand_NoMatch;
7231 
7232   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7233   // integer.
7234   std::string Token;
7235   if (isToken(AsmToken::Integer)) {
7236     SMLoc Loc = getToken().getEndLoc();
7237     Token = std::string(getTokenStr());
7238     lex();
7239     if (getLoc() != Loc)
7240       return MatchOperand_ParseFail;
7241   }
7242   if (!isToken(AsmToken::Identifier))
7243     return MatchOperand_ParseFail;
7244   Token += getTokenStr();
7245 
7246   StringRef DimId = Token;
7247   if (DimId.startswith("SQ_RSRC_IMG_"))
7248     DimId = DimId.substr(12);
7249 
7250   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7251   if (!DimInfo)
7252     return MatchOperand_ParseFail;
7253 
7254   lex();
7255 
7256   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7257                                               AMDGPUOperand::ImmTyDim));
7258   return MatchOperand_Success;
7259 }
7260 
7261 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7262   SMLoc S = getLoc();
7263 
7264   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7265     return MatchOperand_NoMatch;
7266 
7267   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7268 
7269   int64_t Sels[8];
7270 
7271   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7272     return MatchOperand_ParseFail;
7273 
7274   for (size_t i = 0; i < 8; ++i) {
7275     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7276       return MatchOperand_ParseFail;
7277 
7278     SMLoc Loc = getLoc();
7279     if (getParser().parseAbsoluteExpression(Sels[i]))
7280       return MatchOperand_ParseFail;
7281     if (0 > Sels[i] || 7 < Sels[i]) {
7282       Error(Loc, "expected a 3-bit value");
7283       return MatchOperand_ParseFail;
7284     }
7285   }
7286 
7287   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7288     return MatchOperand_ParseFail;
7289 
7290   unsigned DPP8 = 0;
7291   for (size_t i = 0; i < 8; ++i)
7292     DPP8 |= (Sels[i] << (i * 3));
7293 
7294   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7295   return MatchOperand_Success;
7296 }
7297 
7298 bool
7299 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7300                                     const OperandVector &Operands) {
7301   if (Ctrl == "row_share" ||
7302       Ctrl == "row_xmask")
7303     return isGFX10Plus();
7304 
7305   if (Ctrl == "wave_shl" ||
7306       Ctrl == "wave_shr" ||
7307       Ctrl == "wave_rol" ||
7308       Ctrl == "wave_ror" ||
7309       Ctrl == "row_bcast")
7310     return isVI() || isGFX9();
7311 
7312   return Ctrl == "row_mirror" ||
7313          Ctrl == "row_half_mirror" ||
7314          Ctrl == "quad_perm" ||
7315          Ctrl == "row_shl" ||
7316          Ctrl == "row_shr" ||
7317          Ctrl == "row_ror";
7318 }
7319 
7320 int64_t
7321 AMDGPUAsmParser::parseDPPCtrlPerm() {
7322   // quad_perm:[%d,%d,%d,%d]
7323 
7324   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7325     return -1;
7326 
7327   int64_t Val = 0;
7328   for (int i = 0; i < 4; ++i) {
7329     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7330       return -1;
7331 
7332     int64_t Temp;
7333     SMLoc Loc = getLoc();
7334     if (getParser().parseAbsoluteExpression(Temp))
7335       return -1;
7336     if (Temp < 0 || Temp > 3) {
7337       Error(Loc, "expected a 2-bit value");
7338       return -1;
7339     }
7340 
7341     Val += (Temp << i * 2);
7342   }
7343 
7344   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7345     return -1;
7346 
7347   return Val;
7348 }
7349 
7350 int64_t
7351 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7352   using namespace AMDGPU::DPP;
7353 
7354   // sel:%d
7355 
7356   int64_t Val;
7357   SMLoc Loc = getLoc();
7358 
7359   if (getParser().parseAbsoluteExpression(Val))
7360     return -1;
7361 
7362   struct DppCtrlCheck {
7363     int64_t Ctrl;
7364     int Lo;
7365     int Hi;
7366   };
7367 
7368   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7369     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7370     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7371     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7372     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7373     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7374     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7375     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7376     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7377     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7378     .Default({-1, 0, 0});
7379 
7380   bool Valid;
7381   if (Check.Ctrl == -1) {
7382     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7383     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7384   } else {
7385     Valid = Check.Lo <= Val && Val <= Check.Hi;
7386     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7387   }
7388 
7389   if (!Valid) {
7390     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7391     return -1;
7392   }
7393 
7394   return Val;
7395 }
7396 
7397 OperandMatchResultTy
7398 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7399   using namespace AMDGPU::DPP;
7400 
7401   if (!isToken(AsmToken::Identifier) ||
7402       !isSupportedDPPCtrl(getTokenStr(), Operands))
7403     return MatchOperand_NoMatch;
7404 
7405   SMLoc S = getLoc();
7406   int64_t Val = -1;
7407   StringRef Ctrl;
7408 
7409   parseId(Ctrl);
7410 
7411   if (Ctrl == "row_mirror") {
7412     Val = DppCtrl::ROW_MIRROR;
7413   } else if (Ctrl == "row_half_mirror") {
7414     Val = DppCtrl::ROW_HALF_MIRROR;
7415   } else {
7416     if (skipToken(AsmToken::Colon, "expected a colon")) {
7417       if (Ctrl == "quad_perm") {
7418         Val = parseDPPCtrlPerm();
7419       } else {
7420         Val = parseDPPCtrlSel(Ctrl);
7421       }
7422     }
7423   }
7424 
7425   if (Val == -1)
7426     return MatchOperand_ParseFail;
7427 
7428   Operands.push_back(
7429     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7430   return MatchOperand_Success;
7431 }
7432 
7433 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7434   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7435 }
7436 
7437 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7438   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7439 }
7440 
7441 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7442   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7443 }
7444 
7445 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7446   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7447 }
7448 
7449 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7450   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7451 }
7452 
7453 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7454   OptionalImmIndexMap OptionalIdx;
7455 
7456   unsigned I = 1;
7457   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7458   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7459     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7460   }
7461 
7462   int Fi = 0;
7463   for (unsigned E = Operands.size(); I != E; ++I) {
7464     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7465                                             MCOI::TIED_TO);
7466     if (TiedTo != -1) {
7467       assert((unsigned)TiedTo < Inst.getNumOperands());
7468       // handle tied old or src2 for MAC instructions
7469       Inst.addOperand(Inst.getOperand(TiedTo));
7470     }
7471     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7472     // Add the register arguments
7473     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7474       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7475       // Skip it.
7476       continue;
7477     }
7478 
7479     if (IsDPP8) {
7480       if (Op.isDPP8()) {
7481         Op.addImmOperands(Inst, 1);
7482       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7483         Op.addRegWithFPInputModsOperands(Inst, 2);
7484       } else if (Op.isFI()) {
7485         Fi = Op.getImm();
7486       } else if (Op.isReg()) {
7487         Op.addRegOperands(Inst, 1);
7488       } else {
7489         llvm_unreachable("Invalid operand type");
7490       }
7491     } else {
7492       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7493         Op.addRegWithFPInputModsOperands(Inst, 2);
7494       } else if (Op.isDPPCtrl()) {
7495         Op.addImmOperands(Inst, 1);
7496       } else if (Op.isImm()) {
7497         // Handle optional arguments
7498         OptionalIdx[Op.getImmTy()] = I;
7499       } else {
7500         llvm_unreachable("Invalid operand type");
7501       }
7502     }
7503   }
7504 
7505   if (IsDPP8) {
7506     using namespace llvm::AMDGPU::DPP;
7507     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7508   } else {
7509     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7510     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7511     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7512     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7513       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7514     }
7515   }
7516 }
7517 
7518 //===----------------------------------------------------------------------===//
7519 // sdwa
7520 //===----------------------------------------------------------------------===//
7521 
7522 OperandMatchResultTy
7523 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7524                               AMDGPUOperand::ImmTy Type) {
7525   using namespace llvm::AMDGPU::SDWA;
7526 
7527   SMLoc S = getLoc();
7528   StringRef Value;
7529   OperandMatchResultTy res;
7530 
7531   SMLoc StringLoc;
7532   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7533   if (res != MatchOperand_Success) {
7534     return res;
7535   }
7536 
7537   int64_t Int;
7538   Int = StringSwitch<int64_t>(Value)
7539         .Case("BYTE_0", SdwaSel::BYTE_0)
7540         .Case("BYTE_1", SdwaSel::BYTE_1)
7541         .Case("BYTE_2", SdwaSel::BYTE_2)
7542         .Case("BYTE_3", SdwaSel::BYTE_3)
7543         .Case("WORD_0", SdwaSel::WORD_0)
7544         .Case("WORD_1", SdwaSel::WORD_1)
7545         .Case("DWORD", SdwaSel::DWORD)
7546         .Default(0xffffffff);
7547 
7548   if (Int == 0xffffffff) {
7549     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7550     return MatchOperand_ParseFail;
7551   }
7552 
7553   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7554   return MatchOperand_Success;
7555 }
7556 
7557 OperandMatchResultTy
7558 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7559   using namespace llvm::AMDGPU::SDWA;
7560 
7561   SMLoc S = getLoc();
7562   StringRef Value;
7563   OperandMatchResultTy res;
7564 
7565   SMLoc StringLoc;
7566   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
7567   if (res != MatchOperand_Success) {
7568     return res;
7569   }
7570 
7571   int64_t Int;
7572   Int = StringSwitch<int64_t>(Value)
7573         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7574         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7575         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7576         .Default(0xffffffff);
7577 
7578   if (Int == 0xffffffff) {
7579     Error(StringLoc, "invalid dst_unused value");
7580     return MatchOperand_ParseFail;
7581   }
7582 
7583   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7584   return MatchOperand_Success;
7585 }
7586 
7587 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7588   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7589 }
7590 
7591 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7592   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7593 }
7594 
7595 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7596   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7597 }
7598 
7599 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7600   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7601 }
7602 
7603 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7604   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7605 }
7606 
7607 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7608                               uint64_t BasicInstType,
7609                               bool SkipDstVcc,
7610                               bool SkipSrcVcc) {
7611   using namespace llvm::AMDGPU::SDWA;
7612 
7613   OptionalImmIndexMap OptionalIdx;
7614   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7615   bool SkippedVcc = false;
7616 
7617   unsigned I = 1;
7618   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7619   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7620     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7621   }
7622 
7623   for (unsigned E = Operands.size(); I != E; ++I) {
7624     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7625     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7626         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7627       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7628       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7629       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7630       // Skip VCC only if we didn't skip it on previous iteration.
7631       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7632       if (BasicInstType == SIInstrFlags::VOP2 &&
7633           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7634            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7635         SkippedVcc = true;
7636         continue;
7637       } else if (BasicInstType == SIInstrFlags::VOPC &&
7638                  Inst.getNumOperands() == 0) {
7639         SkippedVcc = true;
7640         continue;
7641       }
7642     }
7643     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7644       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7645     } else if (Op.isImm()) {
7646       // Handle optional arguments
7647       OptionalIdx[Op.getImmTy()] = I;
7648     } else {
7649       llvm_unreachable("Invalid operand type");
7650     }
7651     SkippedVcc = false;
7652   }
7653 
7654   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7655       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7656       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7657     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7658     switch (BasicInstType) {
7659     case SIInstrFlags::VOP1:
7660       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7661       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7662         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7663       }
7664       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7665       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7666       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7667       break;
7668 
7669     case SIInstrFlags::VOP2:
7670       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7671       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7672         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7673       }
7674       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7675       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7676       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7677       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7678       break;
7679 
7680     case SIInstrFlags::VOPC:
7681       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7682         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7683       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7684       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7685       break;
7686 
7687     default:
7688       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7689     }
7690   }
7691 
7692   // special case v_mac_{f16, f32}:
7693   // it has src2 register operand that is tied to dst operand
7694   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7695       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7696     auto it = Inst.begin();
7697     std::advance(
7698       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7699     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7700   }
7701 }
7702 
7703 //===----------------------------------------------------------------------===//
7704 // mAI
7705 //===----------------------------------------------------------------------===//
7706 
7707 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7708   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7709 }
7710 
7711 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7712   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7713 }
7714 
7715 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7716   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7717 }
7718 
7719 /// Force static initialization.
7720 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7721   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7722   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7723 }
7724 
7725 #define GET_REGISTER_MATCHER
7726 #define GET_MATCHER_IMPLEMENTATION
7727 #define GET_MNEMONIC_SPELL_CHECKER
7728 #define GET_MNEMONIC_CHECKER
7729 #include "AMDGPUGenAsmMatcher.inc"
7730 
7731 // This fuction should be defined after auto-generated include so that we have
7732 // MatchClassKind enum defined
7733 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7734                                                      unsigned Kind) {
7735   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7736   // But MatchInstructionImpl() expects to meet token and fails to validate
7737   // operand. This method checks if we are given immediate operand but expect to
7738   // get corresponding token.
7739   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7740   switch (Kind) {
7741   case MCK_addr64:
7742     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7743   case MCK_gds:
7744     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7745   case MCK_lds:
7746     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7747   case MCK_glc:
7748     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7749   case MCK_idxen:
7750     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7751   case MCK_offen:
7752     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7753   case MCK_SSrcB32:
7754     // When operands have expression values, they will return true for isToken,
7755     // because it is not possible to distinguish between a token and an
7756     // expression at parse time. MatchInstructionImpl() will always try to
7757     // match an operand as a token, when isToken returns true, and when the
7758     // name of the expression is not a valid token, the match will fail,
7759     // so we need to handle it here.
7760     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7761   case MCK_SSrcF32:
7762     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7763   case MCK_SoppBrTarget:
7764     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7765   case MCK_VReg32OrOff:
7766     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7767   case MCK_InterpSlot:
7768     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7769   case MCK_Attr:
7770     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7771   case MCK_AttrChan:
7772     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7773   case MCK_ImmSMEMOffset:
7774     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7775   case MCK_SReg_64:
7776   case MCK_SReg_64_XEXEC:
7777     // Null is defined as a 32-bit register but
7778     // it should also be enabled with 64-bit operands.
7779     // The following code enables it for SReg_64 operands
7780     // used as source and destination. Remaining source
7781     // operands are handled in isInlinableImm.
7782     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7783   default:
7784     return Match_InvalidOperand;
7785   }
7786 }
7787 
7788 //===----------------------------------------------------------------------===//
7789 // endpgm
7790 //===----------------------------------------------------------------------===//
7791 
7792 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7793   SMLoc S = getLoc();
7794   int64_t Imm = 0;
7795 
7796   if (!parseExpr(Imm)) {
7797     // The operand is optional, if not present default to 0
7798     Imm = 0;
7799   }
7800 
7801   if (!isUInt<16>(Imm)) {
7802     Error(S, "expected a 16-bit value");
7803     return MatchOperand_ParseFail;
7804   }
7805 
7806   Operands.push_back(
7807       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7808   return MatchOperand_Success;
7809 }
7810 
7811 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7812