1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "TargetInfo/AMDGPUTargetInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/AMDGPUMetadata.h"
31 #include "llvm/Support/AMDHSAKernelDescriptor.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/MachineValueType.h"
34 #include "llvm/Support/TargetParser.h"
35 #include "llvm/Support/TargetRegistry.h"
36 
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 using namespace llvm::amdhsa;
40 
41 namespace {
42 
43 class AMDGPUAsmParser;
44 
45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
46 
47 //===----------------------------------------------------------------------===//
48 // Operand
49 //===----------------------------------------------------------------------===//
50 
51 class AMDGPUOperand : public MCParsedAsmOperand {
52   enum KindTy {
53     Token,
54     Immediate,
55     Register,
56     Expression
57   } Kind;
58 
59   SMLoc StartLoc, EndLoc;
60   const AMDGPUAsmParser *AsmParser;
61 
62 public:
63   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
64     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
65 
66   using Ptr = std::unique_ptr<AMDGPUOperand>;
67 
68   struct Modifiers {
69     bool Abs = false;
70     bool Neg = false;
71     bool Sext = false;
72 
73     bool hasFPModifiers() const { return Abs || Neg; }
74     bool hasIntModifiers() const { return Sext; }
75     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
76 
77     int64_t getFPModifiersOperand() const {
78       int64_t Operand = 0;
79       Operand |= Abs ? SISrcMods::ABS : 0u;
80       Operand |= Neg ? SISrcMods::NEG : 0u;
81       return Operand;
82     }
83 
84     int64_t getIntModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Sext ? SISrcMods::SEXT : 0u;
87       return Operand;
88     }
89 
90     int64_t getModifiersOperand() const {
91       assert(!(hasFPModifiers() && hasIntModifiers())
92            && "fp and int modifiers should not be used simultaneously");
93       if (hasFPModifiers()) {
94         return getFPModifiersOperand();
95       } else if (hasIntModifiers()) {
96         return getIntModifiersOperand();
97       } else {
98         return 0;
99       }
100     }
101 
102     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
103   };
104 
105   enum ImmTy {
106     ImmTyNone,
107     ImmTyGDS,
108     ImmTyLDS,
109     ImmTyOffen,
110     ImmTyIdxen,
111     ImmTyAddr64,
112     ImmTyOffset,
113     ImmTyInstOffset,
114     ImmTyOffset0,
115     ImmTyOffset1,
116     ImmTyDLC,
117     ImmTyGLC,
118     ImmTySLC,
119     ImmTySWZ,
120     ImmTyTFE,
121     ImmTyD16,
122     ImmTyClampSI,
123     ImmTyOModSI,
124     ImmTyDPP8,
125     ImmTyDppCtrl,
126     ImmTyDppRowMask,
127     ImmTyDppBankMask,
128     ImmTyDppBoundCtrl,
129     ImmTyDppFi,
130     ImmTySdwaDstSel,
131     ImmTySdwaSrc0Sel,
132     ImmTySdwaSrc1Sel,
133     ImmTySdwaDstUnused,
134     ImmTyDMask,
135     ImmTyDim,
136     ImmTyUNorm,
137     ImmTyDA,
138     ImmTyR128A16,
139     ImmTyA16,
140     ImmTyLWE,
141     ImmTyExpTgt,
142     ImmTyExpCompr,
143     ImmTyExpVM,
144     ImmTyFORMAT,
145     ImmTyHwreg,
146     ImmTyOff,
147     ImmTySendMsg,
148     ImmTyInterpSlot,
149     ImmTyInterpAttr,
150     ImmTyAttrChan,
151     ImmTyOpSel,
152     ImmTyOpSelHi,
153     ImmTyNegLo,
154     ImmTyNegHi,
155     ImmTySwizzle,
156     ImmTyGprIdxMode,
157     ImmTyHigh,
158     ImmTyBLGP,
159     ImmTyCBSZ,
160     ImmTyABID,
161     ImmTyEndpgm,
162   };
163 
164   enum ImmKindTy {
165     ImmKindTyNone,
166     ImmKindTyLiteral,
167     ImmKindTyConst,
168   };
169 
170 private:
171   struct TokOp {
172     const char *Data;
173     unsigned Length;
174   };
175 
176   struct ImmOp {
177     int64_t Val;
178     ImmTy Type;
179     bool IsFPImm;
180     mutable ImmKindTy Kind;
181     Modifiers Mods;
182   };
183 
184   struct RegOp {
185     unsigned RegNo;
186     Modifiers Mods;
187   };
188 
189   union {
190     TokOp Tok;
191     ImmOp Imm;
192     RegOp Reg;
193     const MCExpr *Expr;
194   };
195 
196 public:
197   bool isToken() const override {
198     if (Kind == Token)
199       return true;
200 
201     // When parsing operands, we can't always tell if something was meant to be
202     // a token, like 'gds', or an expression that references a global variable.
203     // In this case, we assume the string is an expression, and if we need to
204     // interpret is a token, then we treat the symbol name as the token.
205     return isSymbolRefExpr();
206   }
207 
208   bool isSymbolRefExpr() const {
209     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
210   }
211 
212   bool isImm() const override {
213     return Kind == Immediate;
214   }
215 
216   void setImmKindNone() const {
217     assert(isImm());
218     Imm.Kind = ImmKindTyNone;
219   }
220 
221   void setImmKindLiteral() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyLiteral;
224   }
225 
226   void setImmKindConst() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyConst;
229   }
230 
231   bool IsImmKindLiteral() const {
232     return isImm() && Imm.Kind == ImmKindTyLiteral;
233   }
234 
235   bool isImmKindConst() const {
236     return isImm() && Imm.Kind == ImmKindTyConst;
237   }
238 
239   bool isInlinableImm(MVT type) const;
240   bool isLiteralImm(MVT type) const;
241 
242   bool isRegKind() const {
243     return Kind == Register;
244   }
245 
246   bool isReg() const override {
247     return isRegKind() && !hasModifiers();
248   }
249 
250   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
251     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
252   }
253 
254   bool isRegOrImmWithInt16InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
256   }
257 
258   bool isRegOrImmWithInt32InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
260   }
261 
262   bool isRegOrImmWithInt64InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
264   }
265 
266   bool isRegOrImmWithFP16InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
268   }
269 
270   bool isRegOrImmWithFP32InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
272   }
273 
274   bool isRegOrImmWithFP64InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
276   }
277 
278   bool isVReg() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
280            isRegClass(AMDGPU::VReg_64RegClassID) ||
281            isRegClass(AMDGPU::VReg_96RegClassID) ||
282            isRegClass(AMDGPU::VReg_128RegClassID) ||
283            isRegClass(AMDGPU::VReg_160RegClassID) ||
284            isRegClass(AMDGPU::VReg_192RegClassID) ||
285            isRegClass(AMDGPU::VReg_256RegClassID) ||
286            isRegClass(AMDGPU::VReg_512RegClassID) ||
287            isRegClass(AMDGPU::VReg_1024RegClassID);
288   }
289 
290   bool isVReg32() const {
291     return isRegClass(AMDGPU::VGPR_32RegClassID);
292   }
293 
294   bool isVReg32OrOff() const {
295     return isOff() || isVReg32();
296   }
297 
298   bool isNull() const {
299     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
300   }
301 
302   bool isSDWAOperand(MVT type) const;
303   bool isSDWAFP16Operand() const;
304   bool isSDWAFP32Operand() const;
305   bool isSDWAInt16Operand() const;
306   bool isSDWAInt32Operand() const;
307 
308   bool isImmTy(ImmTy ImmT) const {
309     return isImm() && Imm.Type == ImmT;
310   }
311 
312   bool isImmModifier() const {
313     return isImm() && Imm.Type != ImmTyNone;
314   }
315 
316   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
317   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
318   bool isDMask() const { return isImmTy(ImmTyDMask); }
319   bool isDim() const { return isImmTy(ImmTyDim); }
320   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
321   bool isDA() const { return isImmTy(ImmTyDA); }
322   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
323   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
324   bool isLWE() const { return isImmTy(ImmTyLWE); }
325   bool isOff() const { return isImmTy(ImmTyOff); }
326   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
327   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
328   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
329   bool isOffen() const { return isImmTy(ImmTyOffen); }
330   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
331   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
332   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
333   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
334   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
335 
336   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
337   bool isGDS() const { return isImmTy(ImmTyGDS); }
338   bool isLDS() const { return isImmTy(ImmTyLDS); }
339   bool isDLC() const { return isImmTy(ImmTyDLC); }
340   bool isGLC() const { return isImmTy(ImmTyGLC); }
341   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
342   // value of the GLC operand.
343   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
344   bool isSLC() const { return isImmTy(ImmTySLC); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcOrLdsB32() const {
453     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
454            isLiteralImm(MVT::i32) || isExpr();
455   }
456 
457   bool isVCSrcB32() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
459   }
460 
461   bool isVCSrcB64() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
463   }
464 
465   bool isVCSrcB16() const {
466     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
467   }
468 
469   bool isVCSrcV2B16() const {
470     return isVCSrcB16();
471   }
472 
473   bool isVCSrcF32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
475   }
476 
477   bool isVCSrcF64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
479   }
480 
481   bool isVCSrcF16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
483   }
484 
485   bool isVCSrcV2F16() const {
486     return isVCSrcF16();
487   }
488 
489   bool isVSrcB32() const {
490     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
491   }
492 
493   bool isVSrcB64() const {
494     return isVCSrcF64() || isLiteralImm(MVT::i64);
495   }
496 
497   bool isVSrcB16() const {
498     return isVCSrcB16() || isLiteralImm(MVT::i16);
499   }
500 
501   bool isVSrcV2B16() const {
502     return isVSrcB16() || isLiteralImm(MVT::v2i16);
503   }
504 
505   bool isVSrcF32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
507   }
508 
509   bool isVSrcF64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::f64);
511   }
512 
513   bool isVSrcF16() const {
514     return isVCSrcF16() || isLiteralImm(MVT::f16);
515   }
516 
517   bool isVSrcV2F16() const {
518     return isVSrcF16() || isLiteralImm(MVT::v2f16);
519   }
520 
521   bool isVISrcB32() const {
522     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
523   }
524 
525   bool isVISrcB16() const {
526     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
527   }
528 
529   bool isVISrcV2B16() const {
530     return isVISrcB16();
531   }
532 
533   bool isVISrcF32() const {
534     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
535   }
536 
537   bool isVISrcF16() const {
538     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
539   }
540 
541   bool isVISrcV2F16() const {
542     return isVISrcF16() || isVISrcB32();
543   }
544 
545   bool isAISrcB32() const {
546     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
547   }
548 
549   bool isAISrcB16() const {
550     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
551   }
552 
553   bool isAISrcV2B16() const {
554     return isAISrcB16();
555   }
556 
557   bool isAISrcF32() const {
558     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
559   }
560 
561   bool isAISrcF16() const {
562     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
563   }
564 
565   bool isAISrcV2F16() const {
566     return isAISrcF16() || isAISrcB32();
567   }
568 
569   bool isAISrc_128B32() const {
570     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
571   }
572 
573   bool isAISrc_128B16() const {
574     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
575   }
576 
577   bool isAISrc_128V2B16() const {
578     return isAISrc_128B16();
579   }
580 
581   bool isAISrc_128F32() const {
582     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
583   }
584 
585   bool isAISrc_128F16() const {
586     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
587   }
588 
589   bool isAISrc_128V2F16() const {
590     return isAISrc_128F16() || isAISrc_128B32();
591   }
592 
593   bool isAISrc_512B32() const {
594     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
595   }
596 
597   bool isAISrc_512B16() const {
598     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
599   }
600 
601   bool isAISrc_512V2B16() const {
602     return isAISrc_512B16();
603   }
604 
605   bool isAISrc_512F32() const {
606     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
607   }
608 
609   bool isAISrc_512F16() const {
610     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
611   }
612 
613   bool isAISrc_512V2F16() const {
614     return isAISrc_512F16() || isAISrc_512B32();
615   }
616 
617   bool isAISrc_1024B32() const {
618     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
619   }
620 
621   bool isAISrc_1024B16() const {
622     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
623   }
624 
625   bool isAISrc_1024V2B16() const {
626     return isAISrc_1024B16();
627   }
628 
629   bool isAISrc_1024F32() const {
630     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
631   }
632 
633   bool isAISrc_1024F16() const {
634     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
635   }
636 
637   bool isAISrc_1024V2F16() const {
638     return isAISrc_1024F16() || isAISrc_1024B32();
639   }
640 
641   bool isKImmFP32() const {
642     return isLiteralImm(MVT::f32);
643   }
644 
645   bool isKImmFP16() const {
646     return isLiteralImm(MVT::f16);
647   }
648 
649   bool isMem() const override {
650     return false;
651   }
652 
653   bool isExpr() const {
654     return Kind == Expression;
655   }
656 
657   bool isSoppBrTarget() const {
658     return isExpr() || isImm();
659   }
660 
661   bool isSWaitCnt() const;
662   bool isHwreg() const;
663   bool isSendMsg() const;
664   bool isSwizzle() const;
665   bool isSMRDOffset8() const;
666   bool isSMEMOffset() const;
667   bool isSMRDLiteralOffset() const;
668   bool isDPP8() const;
669   bool isDPPCtrl() const;
670   bool isBLGP() const;
671   bool isCBSZ() const;
672   bool isABID() const;
673   bool isGPRIdxMode() const;
674   bool isS16Imm() const;
675   bool isU16Imm() const;
676   bool isEndpgm() const;
677 
678   StringRef getExpressionAsToken() const {
679     assert(isExpr());
680     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
681     return S->getSymbol().getName();
682   }
683 
684   StringRef getToken() const {
685     assert(isToken());
686 
687     if (Kind == Expression)
688       return getExpressionAsToken();
689 
690     return StringRef(Tok.Data, Tok.Length);
691   }
692 
693   int64_t getImm() const {
694     assert(isImm());
695     return Imm.Val;
696   }
697 
698   void setImm(int64_t Val) {
699     assert(isImm());
700     Imm.Val = Val;
701   }
702 
703   ImmTy getImmTy() const {
704     assert(isImm());
705     return Imm.Type;
706   }
707 
708   unsigned getReg() const override {
709     assert(isRegKind());
710     return Reg.RegNo;
711   }
712 
713   SMLoc getStartLoc() const override {
714     return StartLoc;
715   }
716 
717   SMLoc getEndLoc() const override {
718     return EndLoc;
719   }
720 
721   SMRange getLocRange() const {
722     return SMRange(StartLoc, EndLoc);
723   }
724 
725   Modifiers getModifiers() const {
726     assert(isRegKind() || isImmTy(ImmTyNone));
727     return isRegKind() ? Reg.Mods : Imm.Mods;
728   }
729 
730   void setModifiers(Modifiers Mods) {
731     assert(isRegKind() || isImmTy(ImmTyNone));
732     if (isRegKind())
733       Reg.Mods = Mods;
734     else
735       Imm.Mods = Mods;
736   }
737 
738   bool hasModifiers() const {
739     return getModifiers().hasModifiers();
740   }
741 
742   bool hasFPModifiers() const {
743     return getModifiers().hasFPModifiers();
744   }
745 
746   bool hasIntModifiers() const {
747     return getModifiers().hasIntModifiers();
748   }
749 
750   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
751 
752   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
753 
754   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
755 
756   template <unsigned Bitwidth>
757   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
758 
759   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
760     addKImmFPOperands<16>(Inst, N);
761   }
762 
763   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
764     addKImmFPOperands<32>(Inst, N);
765   }
766 
767   void addRegOperands(MCInst &Inst, unsigned N) const;
768 
769   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
770     addRegOperands(Inst, N);
771   }
772 
773   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
774     if (isRegKind())
775       addRegOperands(Inst, N);
776     else if (isExpr())
777       Inst.addOperand(MCOperand::createExpr(Expr));
778     else
779       addImmOperands(Inst, N);
780   }
781 
782   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     if (isRegKind()) {
786       addRegOperands(Inst, N);
787     } else {
788       addImmOperands(Inst, N, false);
789     }
790   }
791 
792   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
793     assert(!hasIntModifiers());
794     addRegOrImmWithInputModsOperands(Inst, N);
795   }
796 
797   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasFPModifiers());
799     addRegOrImmWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
803     Modifiers Mods = getModifiers();
804     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
805     assert(isRegKind());
806     addRegOperands(Inst, N);
807   }
808 
809   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
810     assert(!hasIntModifiers());
811     addRegWithInputModsOperands(Inst, N);
812   }
813 
814   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
815     assert(!hasFPModifiers());
816     addRegWithInputModsOperands(Inst, N);
817   }
818 
819   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
820     if (isImm())
821       addImmOperands(Inst, N);
822     else {
823       assert(isExpr());
824       Inst.addOperand(MCOperand::createExpr(Expr));
825     }
826   }
827 
828   static void printImmTy(raw_ostream& OS, ImmTy Type) {
829     switch (Type) {
830     case ImmTyNone: OS << "None"; break;
831     case ImmTyGDS: OS << "GDS"; break;
832     case ImmTyLDS: OS << "LDS"; break;
833     case ImmTyOffen: OS << "Offen"; break;
834     case ImmTyIdxen: OS << "Idxen"; break;
835     case ImmTyAddr64: OS << "Addr64"; break;
836     case ImmTyOffset: OS << "Offset"; break;
837     case ImmTyInstOffset: OS << "InstOffset"; break;
838     case ImmTyOffset0: OS << "Offset0"; break;
839     case ImmTyOffset1: OS << "Offset1"; break;
840     case ImmTyDLC: OS << "DLC"; break;
841     case ImmTyGLC: OS << "GLC"; break;
842     case ImmTySLC: OS << "SLC"; break;
843     case ImmTySWZ: OS << "SWZ"; break;
844     case ImmTyTFE: OS << "TFE"; break;
845     case ImmTyD16: OS << "D16"; break;
846     case ImmTyFORMAT: OS << "FORMAT"; break;
847     case ImmTyClampSI: OS << "ClampSI"; break;
848     case ImmTyOModSI: OS << "OModSI"; break;
849     case ImmTyDPP8: OS << "DPP8"; break;
850     case ImmTyDppCtrl: OS << "DppCtrl"; break;
851     case ImmTyDppRowMask: OS << "DppRowMask"; break;
852     case ImmTyDppBankMask: OS << "DppBankMask"; break;
853     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
854     case ImmTyDppFi: OS << "FI"; break;
855     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
856     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
857     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
858     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
859     case ImmTyDMask: OS << "DMask"; break;
860     case ImmTyDim: OS << "Dim"; break;
861     case ImmTyUNorm: OS << "UNorm"; break;
862     case ImmTyDA: OS << "DA"; break;
863     case ImmTyR128A16: OS << "R128A16"; break;
864     case ImmTyA16: OS << "A16"; break;
865     case ImmTyLWE: OS << "LWE"; break;
866     case ImmTyOff: OS << "Off"; break;
867     case ImmTyExpTgt: OS << "ExpTgt"; break;
868     case ImmTyExpCompr: OS << "ExpCompr"; break;
869     case ImmTyExpVM: OS << "ExpVM"; break;
870     case ImmTyHwreg: OS << "Hwreg"; break;
871     case ImmTySendMsg: OS << "SendMsg"; break;
872     case ImmTyInterpSlot: OS << "InterpSlot"; break;
873     case ImmTyInterpAttr: OS << "InterpAttr"; break;
874     case ImmTyAttrChan: OS << "AttrChan"; break;
875     case ImmTyOpSel: OS << "OpSel"; break;
876     case ImmTyOpSelHi: OS << "OpSelHi"; break;
877     case ImmTyNegLo: OS << "NegLo"; break;
878     case ImmTyNegHi: OS << "NegHi"; break;
879     case ImmTySwizzle: OS << "Swizzle"; break;
880     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
881     case ImmTyHigh: OS << "High"; break;
882     case ImmTyBLGP: OS << "BLGP"; break;
883     case ImmTyCBSZ: OS << "CBSZ"; break;
884     case ImmTyABID: OS << "ABID"; break;
885     case ImmTyEndpgm: OS << "Endpgm"; break;
886     }
887   }
888 
889   void print(raw_ostream &OS) const override {
890     switch (Kind) {
891     case Register:
892       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
893       break;
894     case Immediate:
895       OS << '<' << getImm();
896       if (getImmTy() != ImmTyNone) {
897         OS << " type: "; printImmTy(OS, getImmTy());
898       }
899       OS << " mods: " << Imm.Mods << '>';
900       break;
901     case Token:
902       OS << '\'' << getToken() << '\'';
903       break;
904     case Expression:
905       OS << "<expr " << *Expr << '>';
906       break;
907     }
908   }
909 
910   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
911                                       int64_t Val, SMLoc Loc,
912                                       ImmTy Type = ImmTyNone,
913                                       bool IsFPImm = false) {
914     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
915     Op->Imm.Val = Val;
916     Op->Imm.IsFPImm = IsFPImm;
917     Op->Imm.Kind = ImmKindTyNone;
918     Op->Imm.Type = Type;
919     Op->Imm.Mods = Modifiers();
920     Op->StartLoc = Loc;
921     Op->EndLoc = Loc;
922     return Op;
923   }
924 
925   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
926                                         StringRef Str, SMLoc Loc,
927                                         bool HasExplicitEncodingSize = true) {
928     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
929     Res->Tok.Data = Str.data();
930     Res->Tok.Length = Str.size();
931     Res->StartLoc = Loc;
932     Res->EndLoc = Loc;
933     return Res;
934   }
935 
936   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
937                                       unsigned RegNo, SMLoc S,
938                                       SMLoc E) {
939     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
940     Op->Reg.RegNo = RegNo;
941     Op->Reg.Mods = Modifiers();
942     Op->StartLoc = S;
943     Op->EndLoc = E;
944     return Op;
945   }
946 
947   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
948                                        const class MCExpr *Expr, SMLoc S) {
949     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
950     Op->Expr = Expr;
951     Op->StartLoc = S;
952     Op->EndLoc = S;
953     return Op;
954   }
955 };
956 
957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
958   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
959   return OS;
960 }
961 
962 //===----------------------------------------------------------------------===//
963 // AsmParser
964 //===----------------------------------------------------------------------===//
965 
966 // Holds info related to the current kernel, e.g. count of SGPRs used.
967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
968 // .amdgpu_hsa_kernel or at EOF.
969 class KernelScopeInfo {
970   int SgprIndexUnusedMin = -1;
971   int VgprIndexUnusedMin = -1;
972   MCContext *Ctx = nullptr;
973 
974   void usesSgprAt(int i) {
975     if (i >= SgprIndexUnusedMin) {
976       SgprIndexUnusedMin = ++i;
977       if (Ctx) {
978         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
979         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
980       }
981     }
982   }
983 
984   void usesVgprAt(int i) {
985     if (i >= VgprIndexUnusedMin) {
986       VgprIndexUnusedMin = ++i;
987       if (Ctx) {
988         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
989         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
990       }
991     }
992   }
993 
994 public:
995   KernelScopeInfo() = default;
996 
997   void initialize(MCContext &Context) {
998     Ctx = &Context;
999     usesSgprAt(SgprIndexUnusedMin = -1);
1000     usesVgprAt(VgprIndexUnusedMin = -1);
1001   }
1002 
1003   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1004     switch (RegKind) {
1005       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1006       case IS_AGPR: // fall through
1007       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1008       default: break;
1009     }
1010   }
1011 };
1012 
1013 class AMDGPUAsmParser : public MCTargetAsmParser {
1014   MCAsmParser &Parser;
1015 
1016   // Number of extra operands parsed after the first optional operand.
1017   // This may be necessary to skip hardcoded mandatory operands.
1018   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1019 
1020   unsigned ForcedEncodingSize = 0;
1021   bool ForcedDPP = false;
1022   bool ForcedSDWA = false;
1023   KernelScopeInfo KernelScope;
1024 
1025   /// @name Auto-generated Match Functions
1026   /// {
1027 
1028 #define GET_ASSEMBLER_HEADER
1029 #include "AMDGPUGenAsmMatcher.inc"
1030 
1031   /// }
1032 
1033 private:
1034   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1035   bool OutOfRangeError(SMRange Range);
1036   /// Calculate VGPR/SGPR blocks required for given target, reserved
1037   /// registers, and user-specified NextFreeXGPR values.
1038   ///
1039   /// \param Features [in] Target features, used for bug corrections.
1040   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1041   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1042   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1043   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1044   /// descriptor field, if valid.
1045   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1046   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1047   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1048   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1049   /// \param VGPRBlocks [out] Result VGPR block count.
1050   /// \param SGPRBlocks [out] Result SGPR block count.
1051   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1052                           bool FlatScrUsed, bool XNACKUsed,
1053                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1054                           SMRange VGPRRange, unsigned NextFreeSGPR,
1055                           SMRange SGPRRange, unsigned &VGPRBlocks,
1056                           unsigned &SGPRBlocks);
1057   bool ParseDirectiveAMDGCNTarget();
1058   bool ParseDirectiveAMDHSAKernel();
1059   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1060   bool ParseDirectiveHSACodeObjectVersion();
1061   bool ParseDirectiveHSACodeObjectISA();
1062   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1063   bool ParseDirectiveAMDKernelCodeT();
1064   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1065   bool ParseDirectiveAMDGPUHsaKernel();
1066 
1067   bool ParseDirectiveISAVersion();
1068   bool ParseDirectiveHSAMetadata();
1069   bool ParseDirectivePALMetadataBegin();
1070   bool ParseDirectivePALMetadata();
1071   bool ParseDirectiveAMDGPULDS();
1072 
1073   /// Common code to parse out a block of text (typically YAML) between start and
1074   /// end directives.
1075   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1076                            const char *AssemblerDirectiveEnd,
1077                            std::string &CollectString);
1078 
1079   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1080                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1081   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1082                            unsigned &RegNum, unsigned &RegWidth,
1083                            bool RestoreOnFailure = false);
1084   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1085                            unsigned &RegNum, unsigned &RegWidth,
1086                            SmallVectorImpl<AsmToken> &Tokens);
1087   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1088                            unsigned &RegWidth,
1089                            SmallVectorImpl<AsmToken> &Tokens);
1090   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1091                            unsigned &RegWidth,
1092                            SmallVectorImpl<AsmToken> &Tokens);
1093   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1094                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1095   bool ParseRegRange(unsigned& Num, unsigned& Width);
1096   unsigned getRegularReg(RegisterKind RegKind,
1097                          unsigned RegNum,
1098                          unsigned RegWidth,
1099                          SMLoc Loc);
1100 
1101   bool isRegister();
1102   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1103   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1104   void initializeGprCountSymbol(RegisterKind RegKind);
1105   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1106                              unsigned RegWidth);
1107   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1108                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1109   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1110                  bool IsGdsHardcoded);
1111 
1112 public:
1113   enum AMDGPUMatchResultTy {
1114     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1115   };
1116   enum OperandMode {
1117     OperandMode_Default,
1118     OperandMode_NSA,
1119   };
1120 
1121   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1122 
1123   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1124                const MCInstrInfo &MII,
1125                const MCTargetOptions &Options)
1126       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1127     MCAsmParserExtension::Initialize(Parser);
1128 
1129     if (getFeatureBits().none()) {
1130       // Set default features.
1131       copySTI().ToggleFeature("southern-islands");
1132     }
1133 
1134     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1135 
1136     {
1137       // TODO: make those pre-defined variables read-only.
1138       // Currently there is none suitable machinery in the core llvm-mc for this.
1139       // MCSymbol::isRedefinable is intended for another purpose, and
1140       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1141       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1142       MCContext &Ctx = getContext();
1143       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1144         MCSymbol *Sym =
1145             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1148         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151       } else {
1152         MCSymbol *Sym =
1153             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1154         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1155         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1156         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1157         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1158         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1159       }
1160       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1161         initializeGprCountSymbol(IS_VGPR);
1162         initializeGprCountSymbol(IS_SGPR);
1163       } else
1164         KernelScope.initialize(getContext());
1165     }
1166   }
1167 
1168   bool hasXNACK() const {
1169     return AMDGPU::hasXNACK(getSTI());
1170   }
1171 
1172   bool hasMIMG_R128() const {
1173     return AMDGPU::hasMIMG_R128(getSTI());
1174   }
1175 
1176   bool hasPackedD16() const {
1177     return AMDGPU::hasPackedD16(getSTI());
1178   }
1179 
1180   bool hasGFX10A16() const {
1181     return AMDGPU::hasGFX10A16(getSTI());
1182   }
1183 
1184   bool isSI() const {
1185     return AMDGPU::isSI(getSTI());
1186   }
1187 
1188   bool isCI() const {
1189     return AMDGPU::isCI(getSTI());
1190   }
1191 
1192   bool isVI() const {
1193     return AMDGPU::isVI(getSTI());
1194   }
1195 
1196   bool isGFX9() const {
1197     return AMDGPU::isGFX9(getSTI());
1198   }
1199 
1200   bool isGFX9Plus() const {
1201     return AMDGPU::isGFX9Plus(getSTI());
1202   }
1203 
1204   bool isGFX10() const {
1205     return AMDGPU::isGFX10(getSTI());
1206   }
1207 
1208   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1209 
1210   bool isGFX10_BEncoding() const {
1211     return AMDGPU::isGFX10_BEncoding(getSTI());
1212   }
1213 
1214   bool hasInv2PiInlineImm() const {
1215     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1216   }
1217 
1218   bool hasFlatOffsets() const {
1219     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1220   }
1221 
1222   bool hasSGPR102_SGPR103() const {
1223     return !isVI() && !isGFX9();
1224   }
1225 
1226   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1227 
1228   bool hasIntClamp() const {
1229     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1230   }
1231 
1232   AMDGPUTargetStreamer &getTargetStreamer() {
1233     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1234     return static_cast<AMDGPUTargetStreamer &>(TS);
1235   }
1236 
1237   const MCRegisterInfo *getMRI() const {
1238     // We need this const_cast because for some reason getContext() is not const
1239     // in MCAsmParser.
1240     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1241   }
1242 
1243   const MCInstrInfo *getMII() const {
1244     return &MII;
1245   }
1246 
1247   const FeatureBitset &getFeatureBits() const {
1248     return getSTI().getFeatureBits();
1249   }
1250 
1251   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1252   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1253   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1254 
1255   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1256   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1257   bool isForcedDPP() const { return ForcedDPP; }
1258   bool isForcedSDWA() const { return ForcedSDWA; }
1259   ArrayRef<unsigned> getMatchedVariants() const;
1260   StringRef getMatchedVariantName() const;
1261 
1262   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1263   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1264                      bool RestoreOnFailure);
1265   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1266   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1267                                         SMLoc &EndLoc) override;
1268   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1269   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1270                                       unsigned Kind) override;
1271   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1272                                OperandVector &Operands, MCStreamer &Out,
1273                                uint64_t &ErrorInfo,
1274                                bool MatchingInlineAsm) override;
1275   bool ParseDirective(AsmToken DirectiveID) override;
1276   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1277                                     OperandMode Mode = OperandMode_Default);
1278   StringRef parseMnemonicSuffix(StringRef Name);
1279   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1280                         SMLoc NameLoc, OperandVector &Operands) override;
1281   //bool ProcessInstruction(MCInst &Inst);
1282 
1283   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1284 
1285   OperandMatchResultTy
1286   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1287                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1288                      bool (*ConvertResult)(int64_t &) = nullptr);
1289 
1290   OperandMatchResultTy
1291   parseOperandArrayWithPrefix(const char *Prefix,
1292                               OperandVector &Operands,
1293                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1294                               bool (*ConvertResult)(int64_t&) = nullptr);
1295 
1296   OperandMatchResultTy
1297   parseNamedBit(const char *Name, OperandVector &Operands,
1298                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1299   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1300                                              StringRef &Value);
1301 
1302   bool isModifier();
1303   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1304   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1305   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1306   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1307   bool parseSP3NegModifier();
1308   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1309   OperandMatchResultTy parseReg(OperandVector &Operands);
1310   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1311   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1312   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1313   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1314   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1315   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1316   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1317   OperandMatchResultTy parseUfmt(int64_t &Format);
1318   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1319   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1320   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1321   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1322   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1323   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1324   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1325 
1326   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1327   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1328   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1329   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1330 
1331   bool parseCnt(int64_t &IntVal);
1332   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1333   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1334 
1335 private:
1336   struct OperandInfoTy {
1337     SMLoc Loc;
1338     int64_t Id;
1339     bool IsSymbolic = false;
1340     bool IsDefined = false;
1341 
1342     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1343   };
1344 
1345   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1346   bool validateSendMsg(const OperandInfoTy &Msg,
1347                        const OperandInfoTy &Op,
1348                        const OperandInfoTy &Stream);
1349 
1350   bool parseHwregBody(OperandInfoTy &HwReg,
1351                       OperandInfoTy &Offset,
1352                       OperandInfoTy &Width);
1353   bool validateHwreg(const OperandInfoTy &HwReg,
1354                      const OperandInfoTy &Offset,
1355                      const OperandInfoTy &Width);
1356 
1357   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1358   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1359   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1360 
1361   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1362                       const OperandVector &Operands) const;
1363   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1364   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1365   SMLoc getLitLoc(const OperandVector &Operands) const;
1366   SMLoc getConstLoc(const OperandVector &Operands) const;
1367 
1368   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1369   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1370   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1371   bool validateSOPLiteral(const MCInst &Inst) const;
1372   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1373   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1374   bool validateIntClampSupported(const MCInst &Inst);
1375   bool validateMIMGAtomicDMask(const MCInst &Inst);
1376   bool validateMIMGGatherDMask(const MCInst &Inst);
1377   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1378   bool validateMIMGDataSize(const MCInst &Inst);
1379   bool validateMIMGAddrSize(const MCInst &Inst);
1380   bool validateMIMGD16(const MCInst &Inst);
1381   bool validateMIMGDim(const MCInst &Inst);
1382   bool validateLdsDirect(const MCInst &Inst);
1383   bool validateOpSel(const MCInst &Inst);
1384   bool validateVccOperand(unsigned Reg) const;
1385   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1386   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1387   bool validateDivScale(const MCInst &Inst);
1388   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1389                              const SMLoc &IDLoc);
1390   unsigned getConstantBusLimit(unsigned Opcode) const;
1391   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1392   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1393   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1394 
1395   bool isSupportedMnemo(StringRef Mnemo,
1396                         const FeatureBitset &FBS);
1397   bool isSupportedMnemo(StringRef Mnemo,
1398                         const FeatureBitset &FBS,
1399                         ArrayRef<unsigned> Variants);
1400   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1401 
1402   bool isId(const StringRef Id) const;
1403   bool isId(const AsmToken &Token, const StringRef Id) const;
1404   bool isToken(const AsmToken::TokenKind Kind) const;
1405   bool trySkipId(const StringRef Id);
1406   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1407   bool trySkipToken(const AsmToken::TokenKind Kind);
1408   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1409   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1410   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1411 
1412   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1413   AsmToken::TokenKind getTokenKind() const;
1414   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1415   bool parseExpr(OperandVector &Operands);
1416   StringRef getTokenStr() const;
1417   AsmToken peekToken();
1418   AsmToken getToken() const;
1419   SMLoc getLoc() const;
1420   void lex();
1421 
1422 public:
1423   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1424   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1425 
1426   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1427   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1428   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1429   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1430   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1431   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1432 
1433   bool parseSwizzleOperand(int64_t &Op,
1434                            const unsigned MinVal,
1435                            const unsigned MaxVal,
1436                            const StringRef ErrMsg,
1437                            SMLoc &Loc);
1438   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1439                             const unsigned MinVal,
1440                             const unsigned MaxVal,
1441                             const StringRef ErrMsg);
1442   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1443   bool parseSwizzleOffset(int64_t &Imm);
1444   bool parseSwizzleMacro(int64_t &Imm);
1445   bool parseSwizzleQuadPerm(int64_t &Imm);
1446   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1447   bool parseSwizzleBroadcast(int64_t &Imm);
1448   bool parseSwizzleSwap(int64_t &Imm);
1449   bool parseSwizzleReverse(int64_t &Imm);
1450 
1451   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1452   int64_t parseGPRIdxMacro();
1453 
1454   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1455   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1456   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1457   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1458   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1459 
1460   AMDGPUOperand::Ptr defaultDLC() const;
1461   AMDGPUOperand::Ptr defaultGLC() const;
1462   AMDGPUOperand::Ptr defaultGLC_1() const;
1463   AMDGPUOperand::Ptr defaultSLC() const;
1464 
1465   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1466   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1467   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1468   AMDGPUOperand::Ptr defaultFlatOffset() const;
1469 
1470   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1471 
1472   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1473                OptionalImmIndexMap &OptionalIdx);
1474   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1475   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1476   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1477 
1478   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1479 
1480   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1481                bool IsAtomic = false);
1482   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1483   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1484 
1485   OperandMatchResultTy parseDim(OperandVector &Operands);
1486   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1487   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1488   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1489   int64_t parseDPPCtrlSel(StringRef Ctrl);
1490   int64_t parseDPPCtrlPerm();
1491   AMDGPUOperand::Ptr defaultRowMask() const;
1492   AMDGPUOperand::Ptr defaultBankMask() const;
1493   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1494   AMDGPUOperand::Ptr defaultFI() const;
1495   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1496   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1497 
1498   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1499                                     AMDGPUOperand::ImmTy Type);
1500   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1501   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1502   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1503   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1504   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1505   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1506   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1507                uint64_t BasicInstType,
1508                bool SkipDstVcc = false,
1509                bool SkipSrcVcc = false);
1510 
1511   AMDGPUOperand::Ptr defaultBLGP() const;
1512   AMDGPUOperand::Ptr defaultCBSZ() const;
1513   AMDGPUOperand::Ptr defaultABID() const;
1514 
1515   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1516   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1517 };
1518 
1519 struct OptionalOperand {
1520   const char *Name;
1521   AMDGPUOperand::ImmTy Type;
1522   bool IsBit;
1523   bool (*ConvertResult)(int64_t&);
1524 };
1525 
1526 } // end anonymous namespace
1527 
1528 // May be called with integer type with equivalent bitwidth.
1529 static const fltSemantics *getFltSemantics(unsigned Size) {
1530   switch (Size) {
1531   case 4:
1532     return &APFloat::IEEEsingle();
1533   case 8:
1534     return &APFloat::IEEEdouble();
1535   case 2:
1536     return &APFloat::IEEEhalf();
1537   default:
1538     llvm_unreachable("unsupported fp type");
1539   }
1540 }
1541 
1542 static const fltSemantics *getFltSemantics(MVT VT) {
1543   return getFltSemantics(VT.getSizeInBits() / 8);
1544 }
1545 
1546 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1547   switch (OperandType) {
1548   case AMDGPU::OPERAND_REG_IMM_INT32:
1549   case AMDGPU::OPERAND_REG_IMM_FP32:
1550   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1551   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1552   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1553   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1554     return &APFloat::IEEEsingle();
1555   case AMDGPU::OPERAND_REG_IMM_INT64:
1556   case AMDGPU::OPERAND_REG_IMM_FP64:
1557   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1558   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1559     return &APFloat::IEEEdouble();
1560   case AMDGPU::OPERAND_REG_IMM_INT16:
1561   case AMDGPU::OPERAND_REG_IMM_FP16:
1562   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1563   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1564   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1565   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1566   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1567   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1568   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1569   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1570   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1571   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1572     return &APFloat::IEEEhalf();
1573   default:
1574     llvm_unreachable("unsupported fp type");
1575   }
1576 }
1577 
1578 //===----------------------------------------------------------------------===//
1579 // Operand
1580 //===----------------------------------------------------------------------===//
1581 
1582 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1583   bool Lost;
1584 
1585   // Convert literal to single precision
1586   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1587                                                APFloat::rmNearestTiesToEven,
1588                                                &Lost);
1589   // We allow precision lost but not overflow or underflow
1590   if (Status != APFloat::opOK &&
1591       Lost &&
1592       ((Status & APFloat::opOverflow)  != 0 ||
1593        (Status & APFloat::opUnderflow) != 0)) {
1594     return false;
1595   }
1596 
1597   return true;
1598 }
1599 
1600 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1601   return isUIntN(Size, Val) || isIntN(Size, Val);
1602 }
1603 
1604 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1605   if (VT.getScalarType() == MVT::i16) {
1606     // FP immediate values are broken.
1607     return isInlinableIntLiteral(Val);
1608   }
1609 
1610   // f16/v2f16 operands work correctly for all values.
1611   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1612 }
1613 
1614 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1615 
1616   // This is a hack to enable named inline values like
1617   // shared_base with both 32-bit and 64-bit operands.
1618   // Note that these values are defined as
1619   // 32-bit operands only.
1620   if (isInlineValue()) {
1621     return true;
1622   }
1623 
1624   if (!isImmTy(ImmTyNone)) {
1625     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1626     return false;
1627   }
1628   // TODO: We should avoid using host float here. It would be better to
1629   // check the float bit values which is what a few other places do.
1630   // We've had bot failures before due to weird NaN support on mips hosts.
1631 
1632   APInt Literal(64, Imm.Val);
1633 
1634   if (Imm.IsFPImm) { // We got fp literal token
1635     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1636       return AMDGPU::isInlinableLiteral64(Imm.Val,
1637                                           AsmParser->hasInv2PiInlineImm());
1638     }
1639 
1640     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1641     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1642       return false;
1643 
1644     if (type.getScalarSizeInBits() == 16) {
1645       return isInlineableLiteralOp16(
1646         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1647         type, AsmParser->hasInv2PiInlineImm());
1648     }
1649 
1650     // Check if single precision literal is inlinable
1651     return AMDGPU::isInlinableLiteral32(
1652       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1653       AsmParser->hasInv2PiInlineImm());
1654   }
1655 
1656   // We got int literal token.
1657   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1658     return AMDGPU::isInlinableLiteral64(Imm.Val,
1659                                         AsmParser->hasInv2PiInlineImm());
1660   }
1661 
1662   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1663     return false;
1664   }
1665 
1666   if (type.getScalarSizeInBits() == 16) {
1667     return isInlineableLiteralOp16(
1668       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1669       type, AsmParser->hasInv2PiInlineImm());
1670   }
1671 
1672   return AMDGPU::isInlinableLiteral32(
1673     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1674     AsmParser->hasInv2PiInlineImm());
1675 }
1676 
1677 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1678   // Check that this immediate can be added as literal
1679   if (!isImmTy(ImmTyNone)) {
1680     return false;
1681   }
1682 
1683   if (!Imm.IsFPImm) {
1684     // We got int literal token.
1685 
1686     if (type == MVT::f64 && hasFPModifiers()) {
1687       // Cannot apply fp modifiers to int literals preserving the same semantics
1688       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1689       // disable these cases.
1690       return false;
1691     }
1692 
1693     unsigned Size = type.getSizeInBits();
1694     if (Size == 64)
1695       Size = 32;
1696 
1697     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1698     // types.
1699     return isSafeTruncation(Imm.Val, Size);
1700   }
1701 
1702   // We got fp literal token
1703   if (type == MVT::f64) { // Expected 64-bit fp operand
1704     // We would set low 64-bits of literal to zeroes but we accept this literals
1705     return true;
1706   }
1707 
1708   if (type == MVT::i64) { // Expected 64-bit int operand
1709     // We don't allow fp literals in 64-bit integer instructions. It is
1710     // unclear how we should encode them.
1711     return false;
1712   }
1713 
1714   // We allow fp literals with f16x2 operands assuming that the specified
1715   // literal goes into the lower half and the upper half is zero. We also
1716   // require that the literal may be losslesly converted to f16.
1717   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1718                      (type == MVT::v2i16)? MVT::i16 : type;
1719 
1720   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1721   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1722 }
1723 
1724 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1725   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1726 }
1727 
1728 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1729   if (AsmParser->isVI())
1730     return isVReg32();
1731   else if (AsmParser->isGFX9Plus())
1732     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1733   else
1734     return false;
1735 }
1736 
1737 bool AMDGPUOperand::isSDWAFP16Operand() const {
1738   return isSDWAOperand(MVT::f16);
1739 }
1740 
1741 bool AMDGPUOperand::isSDWAFP32Operand() const {
1742   return isSDWAOperand(MVT::f32);
1743 }
1744 
1745 bool AMDGPUOperand::isSDWAInt16Operand() const {
1746   return isSDWAOperand(MVT::i16);
1747 }
1748 
1749 bool AMDGPUOperand::isSDWAInt32Operand() const {
1750   return isSDWAOperand(MVT::i32);
1751 }
1752 
1753 bool AMDGPUOperand::isBoolReg() const {
1754   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1755          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1756 }
1757 
1758 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1759 {
1760   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1761   assert(Size == 2 || Size == 4 || Size == 8);
1762 
1763   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1764 
1765   if (Imm.Mods.Abs) {
1766     Val &= ~FpSignMask;
1767   }
1768   if (Imm.Mods.Neg) {
1769     Val ^= FpSignMask;
1770   }
1771 
1772   return Val;
1773 }
1774 
1775 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1776   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1777                              Inst.getNumOperands())) {
1778     addLiteralImmOperand(Inst, Imm.Val,
1779                          ApplyModifiers &
1780                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1781   } else {
1782     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1783     Inst.addOperand(MCOperand::createImm(Imm.Val));
1784     setImmKindNone();
1785   }
1786 }
1787 
1788 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1789   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1790   auto OpNum = Inst.getNumOperands();
1791   // Check that this operand accepts literals
1792   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1793 
1794   if (ApplyModifiers) {
1795     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1796     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1797     Val = applyInputFPModifiers(Val, Size);
1798   }
1799 
1800   APInt Literal(64, Val);
1801   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1802 
1803   if (Imm.IsFPImm) { // We got fp literal token
1804     switch (OpTy) {
1805     case AMDGPU::OPERAND_REG_IMM_INT64:
1806     case AMDGPU::OPERAND_REG_IMM_FP64:
1807     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1808     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1809       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1810                                        AsmParser->hasInv2PiInlineImm())) {
1811         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1812         setImmKindConst();
1813         return;
1814       }
1815 
1816       // Non-inlineable
1817       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1818         // For fp operands we check if low 32 bits are zeros
1819         if (Literal.getLoBits(32) != 0) {
1820           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1821           "Can't encode literal as exact 64-bit floating-point operand. "
1822           "Low 32-bits will be set to zero");
1823         }
1824 
1825         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1826         setImmKindLiteral();
1827         return;
1828       }
1829 
1830       // We don't allow fp literals in 64-bit integer instructions. It is
1831       // unclear how we should encode them. This case should be checked earlier
1832       // in predicate methods (isLiteralImm())
1833       llvm_unreachable("fp literal in 64-bit integer instruction.");
1834 
1835     case AMDGPU::OPERAND_REG_IMM_INT32:
1836     case AMDGPU::OPERAND_REG_IMM_FP32:
1837     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1838     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1839     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1840     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1841     case AMDGPU::OPERAND_REG_IMM_INT16:
1842     case AMDGPU::OPERAND_REG_IMM_FP16:
1843     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1844     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1845     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1846     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1847     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1848     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1849     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1850     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1851     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1852     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1853       bool lost;
1854       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1855       // Convert literal to single precision
1856       FPLiteral.convert(*getOpFltSemantics(OpTy),
1857                         APFloat::rmNearestTiesToEven, &lost);
1858       // We allow precision lost but not overflow or underflow. This should be
1859       // checked earlier in isLiteralImm()
1860 
1861       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1862       Inst.addOperand(MCOperand::createImm(ImmVal));
1863       setImmKindLiteral();
1864       return;
1865     }
1866     default:
1867       llvm_unreachable("invalid operand size");
1868     }
1869 
1870     return;
1871   }
1872 
1873   // We got int literal token.
1874   // Only sign extend inline immediates.
1875   switch (OpTy) {
1876   case AMDGPU::OPERAND_REG_IMM_INT32:
1877   case AMDGPU::OPERAND_REG_IMM_FP32:
1878   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1879   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1880   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1881   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1882   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1883   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1884     if (isSafeTruncation(Val, 32) &&
1885         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1886                                      AsmParser->hasInv2PiInlineImm())) {
1887       Inst.addOperand(MCOperand::createImm(Val));
1888       setImmKindConst();
1889       return;
1890     }
1891 
1892     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1893     setImmKindLiteral();
1894     return;
1895 
1896   case AMDGPU::OPERAND_REG_IMM_INT64:
1897   case AMDGPU::OPERAND_REG_IMM_FP64:
1898   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1899   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1900     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1901       Inst.addOperand(MCOperand::createImm(Val));
1902       setImmKindConst();
1903       return;
1904     }
1905 
1906     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1907     setImmKindLiteral();
1908     return;
1909 
1910   case AMDGPU::OPERAND_REG_IMM_INT16:
1911   case AMDGPU::OPERAND_REG_IMM_FP16:
1912   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1913   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1914   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1915   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1916     if (isSafeTruncation(Val, 16) &&
1917         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1918                                      AsmParser->hasInv2PiInlineImm())) {
1919       Inst.addOperand(MCOperand::createImm(Val));
1920       setImmKindConst();
1921       return;
1922     }
1923 
1924     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1925     setImmKindLiteral();
1926     return;
1927 
1928   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1929   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1930   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1931   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1932     assert(isSafeTruncation(Val, 16));
1933     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1934                                         AsmParser->hasInv2PiInlineImm()));
1935 
1936     Inst.addOperand(MCOperand::createImm(Val));
1937     return;
1938   }
1939   default:
1940     llvm_unreachable("invalid operand size");
1941   }
1942 }
1943 
1944 template <unsigned Bitwidth>
1945 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1946   APInt Literal(64, Imm.Val);
1947   setImmKindNone();
1948 
1949   if (!Imm.IsFPImm) {
1950     // We got int literal token.
1951     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1952     return;
1953   }
1954 
1955   bool Lost;
1956   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1957   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1958                     APFloat::rmNearestTiesToEven, &Lost);
1959   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1960 }
1961 
1962 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1963   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1964 }
1965 
1966 static bool isInlineValue(unsigned Reg) {
1967   switch (Reg) {
1968   case AMDGPU::SRC_SHARED_BASE:
1969   case AMDGPU::SRC_SHARED_LIMIT:
1970   case AMDGPU::SRC_PRIVATE_BASE:
1971   case AMDGPU::SRC_PRIVATE_LIMIT:
1972   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1973     return true;
1974   case AMDGPU::SRC_VCCZ:
1975   case AMDGPU::SRC_EXECZ:
1976   case AMDGPU::SRC_SCC:
1977     return true;
1978   case AMDGPU::SGPR_NULL:
1979     return true;
1980   default:
1981     return false;
1982   }
1983 }
1984 
1985 bool AMDGPUOperand::isInlineValue() const {
1986   return isRegKind() && ::isInlineValue(getReg());
1987 }
1988 
1989 //===----------------------------------------------------------------------===//
1990 // AsmParser
1991 //===----------------------------------------------------------------------===//
1992 
1993 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1994   if (Is == IS_VGPR) {
1995     switch (RegWidth) {
1996       default: return -1;
1997       case 1: return AMDGPU::VGPR_32RegClassID;
1998       case 2: return AMDGPU::VReg_64RegClassID;
1999       case 3: return AMDGPU::VReg_96RegClassID;
2000       case 4: return AMDGPU::VReg_128RegClassID;
2001       case 5: return AMDGPU::VReg_160RegClassID;
2002       case 6: return AMDGPU::VReg_192RegClassID;
2003       case 8: return AMDGPU::VReg_256RegClassID;
2004       case 16: return AMDGPU::VReg_512RegClassID;
2005       case 32: return AMDGPU::VReg_1024RegClassID;
2006     }
2007   } else if (Is == IS_TTMP) {
2008     switch (RegWidth) {
2009       default: return -1;
2010       case 1: return AMDGPU::TTMP_32RegClassID;
2011       case 2: return AMDGPU::TTMP_64RegClassID;
2012       case 4: return AMDGPU::TTMP_128RegClassID;
2013       case 8: return AMDGPU::TTMP_256RegClassID;
2014       case 16: return AMDGPU::TTMP_512RegClassID;
2015     }
2016   } else if (Is == IS_SGPR) {
2017     switch (RegWidth) {
2018       default: return -1;
2019       case 1: return AMDGPU::SGPR_32RegClassID;
2020       case 2: return AMDGPU::SGPR_64RegClassID;
2021       case 3: return AMDGPU::SGPR_96RegClassID;
2022       case 4: return AMDGPU::SGPR_128RegClassID;
2023       case 5: return AMDGPU::SGPR_160RegClassID;
2024       case 6: return AMDGPU::SGPR_192RegClassID;
2025       case 8: return AMDGPU::SGPR_256RegClassID;
2026       case 16: return AMDGPU::SGPR_512RegClassID;
2027     }
2028   } else if (Is == IS_AGPR) {
2029     switch (RegWidth) {
2030       default: return -1;
2031       case 1: return AMDGPU::AGPR_32RegClassID;
2032       case 2: return AMDGPU::AReg_64RegClassID;
2033       case 3: return AMDGPU::AReg_96RegClassID;
2034       case 4: return AMDGPU::AReg_128RegClassID;
2035       case 5: return AMDGPU::AReg_160RegClassID;
2036       case 6: return AMDGPU::AReg_192RegClassID;
2037       case 8: return AMDGPU::AReg_256RegClassID;
2038       case 16: return AMDGPU::AReg_512RegClassID;
2039       case 32: return AMDGPU::AReg_1024RegClassID;
2040     }
2041   }
2042   return -1;
2043 }
2044 
2045 static unsigned getSpecialRegForName(StringRef RegName) {
2046   return StringSwitch<unsigned>(RegName)
2047     .Case("exec", AMDGPU::EXEC)
2048     .Case("vcc", AMDGPU::VCC)
2049     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2050     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2051     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2052     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2053     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2054     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2055     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2056     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2057     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2058     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2059     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2060     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2061     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2062     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2063     .Case("m0", AMDGPU::M0)
2064     .Case("vccz", AMDGPU::SRC_VCCZ)
2065     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2066     .Case("execz", AMDGPU::SRC_EXECZ)
2067     .Case("src_execz", AMDGPU::SRC_EXECZ)
2068     .Case("scc", AMDGPU::SRC_SCC)
2069     .Case("src_scc", AMDGPU::SRC_SCC)
2070     .Case("tba", AMDGPU::TBA)
2071     .Case("tma", AMDGPU::TMA)
2072     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2073     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2074     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2075     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2076     .Case("vcc_lo", AMDGPU::VCC_LO)
2077     .Case("vcc_hi", AMDGPU::VCC_HI)
2078     .Case("exec_lo", AMDGPU::EXEC_LO)
2079     .Case("exec_hi", AMDGPU::EXEC_HI)
2080     .Case("tma_lo", AMDGPU::TMA_LO)
2081     .Case("tma_hi", AMDGPU::TMA_HI)
2082     .Case("tba_lo", AMDGPU::TBA_LO)
2083     .Case("tba_hi", AMDGPU::TBA_HI)
2084     .Case("pc", AMDGPU::PC_REG)
2085     .Case("null", AMDGPU::SGPR_NULL)
2086     .Default(AMDGPU::NoRegister);
2087 }
2088 
2089 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2090                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2091   auto R = parseRegister();
2092   if (!R) return true;
2093   assert(R->isReg());
2094   RegNo = R->getReg();
2095   StartLoc = R->getStartLoc();
2096   EndLoc = R->getEndLoc();
2097   return false;
2098 }
2099 
2100 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2101                                     SMLoc &EndLoc) {
2102   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2103 }
2104 
2105 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2106                                                        SMLoc &StartLoc,
2107                                                        SMLoc &EndLoc) {
2108   bool Result =
2109       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2110   bool PendingErrors = getParser().hasPendingError();
2111   getParser().clearPendingErrors();
2112   if (PendingErrors)
2113     return MatchOperand_ParseFail;
2114   if (Result)
2115     return MatchOperand_NoMatch;
2116   return MatchOperand_Success;
2117 }
2118 
2119 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2120                                             RegisterKind RegKind, unsigned Reg1,
2121                                             SMLoc Loc) {
2122   switch (RegKind) {
2123   case IS_SPECIAL:
2124     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2125       Reg = AMDGPU::EXEC;
2126       RegWidth = 2;
2127       return true;
2128     }
2129     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2130       Reg = AMDGPU::FLAT_SCR;
2131       RegWidth = 2;
2132       return true;
2133     }
2134     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2135       Reg = AMDGPU::XNACK_MASK;
2136       RegWidth = 2;
2137       return true;
2138     }
2139     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2140       Reg = AMDGPU::VCC;
2141       RegWidth = 2;
2142       return true;
2143     }
2144     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2145       Reg = AMDGPU::TBA;
2146       RegWidth = 2;
2147       return true;
2148     }
2149     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2150       Reg = AMDGPU::TMA;
2151       RegWidth = 2;
2152       return true;
2153     }
2154     Error(Loc, "register does not fit in the list");
2155     return false;
2156   case IS_VGPR:
2157   case IS_SGPR:
2158   case IS_AGPR:
2159   case IS_TTMP:
2160     if (Reg1 != Reg + RegWidth) {
2161       Error(Loc, "registers in a list must have consecutive indices");
2162       return false;
2163     }
2164     RegWidth++;
2165     return true;
2166   default:
2167     llvm_unreachable("unexpected register kind");
2168   }
2169 }
2170 
2171 struct RegInfo {
2172   StringLiteral Name;
2173   RegisterKind Kind;
2174 };
2175 
2176 static constexpr RegInfo RegularRegisters[] = {
2177   {{"v"},    IS_VGPR},
2178   {{"s"},    IS_SGPR},
2179   {{"ttmp"}, IS_TTMP},
2180   {{"acc"},  IS_AGPR},
2181   {{"a"},    IS_AGPR},
2182 };
2183 
2184 static bool isRegularReg(RegisterKind Kind) {
2185   return Kind == IS_VGPR ||
2186          Kind == IS_SGPR ||
2187          Kind == IS_TTMP ||
2188          Kind == IS_AGPR;
2189 }
2190 
2191 static const RegInfo* getRegularRegInfo(StringRef Str) {
2192   for (const RegInfo &Reg : RegularRegisters)
2193     if (Str.startswith(Reg.Name))
2194       return &Reg;
2195   return nullptr;
2196 }
2197 
2198 static bool getRegNum(StringRef Str, unsigned& Num) {
2199   return !Str.getAsInteger(10, Num);
2200 }
2201 
2202 bool
2203 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2204                             const AsmToken &NextToken) const {
2205 
2206   // A list of consecutive registers: [s0,s1,s2,s3]
2207   if (Token.is(AsmToken::LBrac))
2208     return true;
2209 
2210   if (!Token.is(AsmToken::Identifier))
2211     return false;
2212 
2213   // A single register like s0 or a range of registers like s[0:1]
2214 
2215   StringRef Str = Token.getString();
2216   const RegInfo *Reg = getRegularRegInfo(Str);
2217   if (Reg) {
2218     StringRef RegName = Reg->Name;
2219     StringRef RegSuffix = Str.substr(RegName.size());
2220     if (!RegSuffix.empty()) {
2221       unsigned Num;
2222       // A single register with an index: rXX
2223       if (getRegNum(RegSuffix, Num))
2224         return true;
2225     } else {
2226       // A range of registers: r[XX:YY].
2227       if (NextToken.is(AsmToken::LBrac))
2228         return true;
2229     }
2230   }
2231 
2232   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2233 }
2234 
2235 bool
2236 AMDGPUAsmParser::isRegister()
2237 {
2238   return isRegister(getToken(), peekToken());
2239 }
2240 
2241 unsigned
2242 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2243                                unsigned RegNum,
2244                                unsigned RegWidth,
2245                                SMLoc Loc) {
2246 
2247   assert(isRegularReg(RegKind));
2248 
2249   unsigned AlignSize = 1;
2250   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2251     // SGPR and TTMP registers must be aligned.
2252     // Max required alignment is 4 dwords.
2253     AlignSize = std::min(RegWidth, 4u);
2254   }
2255 
2256   if (RegNum % AlignSize != 0) {
2257     Error(Loc, "invalid register alignment");
2258     return AMDGPU::NoRegister;
2259   }
2260 
2261   unsigned RegIdx = RegNum / AlignSize;
2262   int RCID = getRegClass(RegKind, RegWidth);
2263   if (RCID == -1) {
2264     Error(Loc, "invalid or unsupported register size");
2265     return AMDGPU::NoRegister;
2266   }
2267 
2268   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2269   const MCRegisterClass RC = TRI->getRegClass(RCID);
2270   if (RegIdx >= RC.getNumRegs()) {
2271     Error(Loc, "register index is out of range");
2272     return AMDGPU::NoRegister;
2273   }
2274 
2275   return RC.getRegister(RegIdx);
2276 }
2277 
2278 bool
2279 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2280   int64_t RegLo, RegHi;
2281   if (!skipToken(AsmToken::LBrac, "missing register index"))
2282     return false;
2283 
2284   SMLoc FirstIdxLoc = getLoc();
2285   SMLoc SecondIdxLoc;
2286 
2287   if (!parseExpr(RegLo))
2288     return false;
2289 
2290   if (trySkipToken(AsmToken::Colon)) {
2291     SecondIdxLoc = getLoc();
2292     if (!parseExpr(RegHi))
2293       return false;
2294   } else {
2295     RegHi = RegLo;
2296   }
2297 
2298   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2299     return false;
2300 
2301   if (!isUInt<32>(RegLo)) {
2302     Error(FirstIdxLoc, "invalid register index");
2303     return false;
2304   }
2305 
2306   if (!isUInt<32>(RegHi)) {
2307     Error(SecondIdxLoc, "invalid register index");
2308     return false;
2309   }
2310 
2311   if (RegLo > RegHi) {
2312     Error(FirstIdxLoc, "first register index should not exceed second index");
2313     return false;
2314   }
2315 
2316   Num = static_cast<unsigned>(RegLo);
2317   Width = (RegHi - RegLo) + 1;
2318   return true;
2319 }
2320 
2321 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2322                                           unsigned &RegNum, unsigned &RegWidth,
2323                                           SmallVectorImpl<AsmToken> &Tokens) {
2324   assert(isToken(AsmToken::Identifier));
2325   unsigned Reg = getSpecialRegForName(getTokenStr());
2326   if (Reg) {
2327     RegNum = 0;
2328     RegWidth = 1;
2329     RegKind = IS_SPECIAL;
2330     Tokens.push_back(getToken());
2331     lex(); // skip register name
2332   }
2333   return Reg;
2334 }
2335 
2336 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2337                                           unsigned &RegNum, unsigned &RegWidth,
2338                                           SmallVectorImpl<AsmToken> &Tokens) {
2339   assert(isToken(AsmToken::Identifier));
2340   StringRef RegName = getTokenStr();
2341   auto Loc = getLoc();
2342 
2343   const RegInfo *RI = getRegularRegInfo(RegName);
2344   if (!RI) {
2345     Error(Loc, "invalid register name");
2346     return AMDGPU::NoRegister;
2347   }
2348 
2349   Tokens.push_back(getToken());
2350   lex(); // skip register name
2351 
2352   RegKind = RI->Kind;
2353   StringRef RegSuffix = RegName.substr(RI->Name.size());
2354   if (!RegSuffix.empty()) {
2355     // Single 32-bit register: vXX.
2356     if (!getRegNum(RegSuffix, RegNum)) {
2357       Error(Loc, "invalid register index");
2358       return AMDGPU::NoRegister;
2359     }
2360     RegWidth = 1;
2361   } else {
2362     // Range of registers: v[XX:YY]. ":YY" is optional.
2363     if (!ParseRegRange(RegNum, RegWidth))
2364       return AMDGPU::NoRegister;
2365   }
2366 
2367   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2368 }
2369 
2370 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2371                                        unsigned &RegWidth,
2372                                        SmallVectorImpl<AsmToken> &Tokens) {
2373   unsigned Reg = AMDGPU::NoRegister;
2374   auto ListLoc = getLoc();
2375 
2376   if (!skipToken(AsmToken::LBrac,
2377                  "expected a register or a list of registers")) {
2378     return AMDGPU::NoRegister;
2379   }
2380 
2381   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2382 
2383   auto Loc = getLoc();
2384   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2385     return AMDGPU::NoRegister;
2386   if (RegWidth != 1) {
2387     Error(Loc, "expected a single 32-bit register");
2388     return AMDGPU::NoRegister;
2389   }
2390 
2391   for (; trySkipToken(AsmToken::Comma); ) {
2392     RegisterKind NextRegKind;
2393     unsigned NextReg, NextRegNum, NextRegWidth;
2394     Loc = getLoc();
2395 
2396     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2397                              NextRegNum, NextRegWidth,
2398                              Tokens)) {
2399       return AMDGPU::NoRegister;
2400     }
2401     if (NextRegWidth != 1) {
2402       Error(Loc, "expected a single 32-bit register");
2403       return AMDGPU::NoRegister;
2404     }
2405     if (NextRegKind != RegKind) {
2406       Error(Loc, "registers in a list must be of the same kind");
2407       return AMDGPU::NoRegister;
2408     }
2409     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2410       return AMDGPU::NoRegister;
2411   }
2412 
2413   if (!skipToken(AsmToken::RBrac,
2414                  "expected a comma or a closing square bracket")) {
2415     return AMDGPU::NoRegister;
2416   }
2417 
2418   if (isRegularReg(RegKind))
2419     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2420 
2421   return Reg;
2422 }
2423 
2424 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2425                                           unsigned &RegNum, unsigned &RegWidth,
2426                                           SmallVectorImpl<AsmToken> &Tokens) {
2427   auto Loc = getLoc();
2428   Reg = AMDGPU::NoRegister;
2429 
2430   if (isToken(AsmToken::Identifier)) {
2431     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2432     if (Reg == AMDGPU::NoRegister)
2433       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2434   } else {
2435     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2436   }
2437 
2438   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2439   if (Reg == AMDGPU::NoRegister) {
2440     assert(Parser.hasPendingError());
2441     return false;
2442   }
2443 
2444   if (!subtargetHasRegister(*TRI, Reg)) {
2445     if (Reg == AMDGPU::SGPR_NULL) {
2446       Error(Loc, "'null' operand is not supported on this GPU");
2447     } else {
2448       Error(Loc, "register not available on this GPU");
2449     }
2450     return false;
2451   }
2452 
2453   return true;
2454 }
2455 
2456 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2457                                           unsigned &RegNum, unsigned &RegWidth,
2458                                           bool RestoreOnFailure /*=false*/) {
2459   Reg = AMDGPU::NoRegister;
2460 
2461   SmallVector<AsmToken, 1> Tokens;
2462   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2463     if (RestoreOnFailure) {
2464       while (!Tokens.empty()) {
2465         getLexer().UnLex(Tokens.pop_back_val());
2466       }
2467     }
2468     return true;
2469   }
2470   return false;
2471 }
2472 
2473 Optional<StringRef>
2474 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2475   switch (RegKind) {
2476   case IS_VGPR:
2477     return StringRef(".amdgcn.next_free_vgpr");
2478   case IS_SGPR:
2479     return StringRef(".amdgcn.next_free_sgpr");
2480   default:
2481     return None;
2482   }
2483 }
2484 
2485 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2486   auto SymbolName = getGprCountSymbolName(RegKind);
2487   assert(SymbolName && "initializing invalid register kind");
2488   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2489   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2490 }
2491 
2492 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2493                                             unsigned DwordRegIndex,
2494                                             unsigned RegWidth) {
2495   // Symbols are only defined for GCN targets
2496   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2497     return true;
2498 
2499   auto SymbolName = getGprCountSymbolName(RegKind);
2500   if (!SymbolName)
2501     return true;
2502   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2503 
2504   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2505   int64_t OldCount;
2506 
2507   if (!Sym->isVariable())
2508     return !Error(getLoc(),
2509                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2510   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2511     return !Error(
2512         getLoc(),
2513         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2514 
2515   if (OldCount <= NewMax)
2516     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2517 
2518   return true;
2519 }
2520 
2521 std::unique_ptr<AMDGPUOperand>
2522 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2523   const auto &Tok = getToken();
2524   SMLoc StartLoc = Tok.getLoc();
2525   SMLoc EndLoc = Tok.getEndLoc();
2526   RegisterKind RegKind;
2527   unsigned Reg, RegNum, RegWidth;
2528 
2529   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2530     return nullptr;
2531   }
2532   if (isHsaAbiVersion3(&getSTI())) {
2533     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2534       return nullptr;
2535   } else
2536     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2537   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2538 }
2539 
2540 OperandMatchResultTy
2541 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2542   // TODO: add syntactic sugar for 1/(2*PI)
2543 
2544   assert(!isRegister());
2545   assert(!isModifier());
2546 
2547   const auto& Tok = getToken();
2548   const auto& NextTok = peekToken();
2549   bool IsReal = Tok.is(AsmToken::Real);
2550   SMLoc S = getLoc();
2551   bool Negate = false;
2552 
2553   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2554     lex();
2555     IsReal = true;
2556     Negate = true;
2557   }
2558 
2559   if (IsReal) {
2560     // Floating-point expressions are not supported.
2561     // Can only allow floating-point literals with an
2562     // optional sign.
2563 
2564     StringRef Num = getTokenStr();
2565     lex();
2566 
2567     APFloat RealVal(APFloat::IEEEdouble());
2568     auto roundMode = APFloat::rmNearestTiesToEven;
2569     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2570       return MatchOperand_ParseFail;
2571     }
2572     if (Negate)
2573       RealVal.changeSign();
2574 
2575     Operands.push_back(
2576       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2577                                AMDGPUOperand::ImmTyNone, true));
2578 
2579     return MatchOperand_Success;
2580 
2581   } else {
2582     int64_t IntVal;
2583     const MCExpr *Expr;
2584     SMLoc S = getLoc();
2585 
2586     if (HasSP3AbsModifier) {
2587       // This is a workaround for handling expressions
2588       // as arguments of SP3 'abs' modifier, for example:
2589       //     |1.0|
2590       //     |-1|
2591       //     |1+x|
2592       // This syntax is not compatible with syntax of standard
2593       // MC expressions (due to the trailing '|').
2594       SMLoc EndLoc;
2595       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2596         return MatchOperand_ParseFail;
2597     } else {
2598       if (Parser.parseExpression(Expr))
2599         return MatchOperand_ParseFail;
2600     }
2601 
2602     if (Expr->evaluateAsAbsolute(IntVal)) {
2603       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2604     } else {
2605       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2606     }
2607 
2608     return MatchOperand_Success;
2609   }
2610 
2611   return MatchOperand_NoMatch;
2612 }
2613 
2614 OperandMatchResultTy
2615 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2616   if (!isRegister())
2617     return MatchOperand_NoMatch;
2618 
2619   if (auto R = parseRegister()) {
2620     assert(R->isReg());
2621     Operands.push_back(std::move(R));
2622     return MatchOperand_Success;
2623   }
2624   return MatchOperand_ParseFail;
2625 }
2626 
2627 OperandMatchResultTy
2628 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2629   auto res = parseReg(Operands);
2630   if (res != MatchOperand_NoMatch) {
2631     return res;
2632   } else if (isModifier()) {
2633     return MatchOperand_NoMatch;
2634   } else {
2635     return parseImm(Operands, HasSP3AbsMod);
2636   }
2637 }
2638 
2639 bool
2640 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2641   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2642     const auto &str = Token.getString();
2643     return str == "abs" || str == "neg" || str == "sext";
2644   }
2645   return false;
2646 }
2647 
2648 bool
2649 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2650   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2651 }
2652 
2653 bool
2654 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2655   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2656 }
2657 
2658 bool
2659 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2660   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2661 }
2662 
2663 // Check if this is an operand modifier or an opcode modifier
2664 // which may look like an expression but it is not. We should
2665 // avoid parsing these modifiers as expressions. Currently
2666 // recognized sequences are:
2667 //   |...|
2668 //   abs(...)
2669 //   neg(...)
2670 //   sext(...)
2671 //   -reg
2672 //   -|...|
2673 //   -abs(...)
2674 //   name:...
2675 // Note that simple opcode modifiers like 'gds' may be parsed as
2676 // expressions; this is a special case. See getExpressionAsToken.
2677 //
2678 bool
2679 AMDGPUAsmParser::isModifier() {
2680 
2681   AsmToken Tok = getToken();
2682   AsmToken NextToken[2];
2683   peekTokens(NextToken);
2684 
2685   return isOperandModifier(Tok, NextToken[0]) ||
2686          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2687          isOpcodeModifierWithVal(Tok, NextToken[0]);
2688 }
2689 
2690 // Check if the current token is an SP3 'neg' modifier.
2691 // Currently this modifier is allowed in the following context:
2692 //
2693 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2694 // 2. Before an 'abs' modifier: -abs(...)
2695 // 3. Before an SP3 'abs' modifier: -|...|
2696 //
2697 // In all other cases "-" is handled as a part
2698 // of an expression that follows the sign.
2699 //
2700 // Note: When "-" is followed by an integer literal,
2701 // this is interpreted as integer negation rather
2702 // than a floating-point NEG modifier applied to N.
2703 // Beside being contr-intuitive, such use of floating-point
2704 // NEG modifier would have resulted in different meaning
2705 // of integer literals used with VOP1/2/C and VOP3,
2706 // for example:
2707 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2708 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2709 // Negative fp literals with preceding "-" are
2710 // handled likewise for unifomtity
2711 //
2712 bool
2713 AMDGPUAsmParser::parseSP3NegModifier() {
2714 
2715   AsmToken NextToken[2];
2716   peekTokens(NextToken);
2717 
2718   if (isToken(AsmToken::Minus) &&
2719       (isRegister(NextToken[0], NextToken[1]) ||
2720        NextToken[0].is(AsmToken::Pipe) ||
2721        isId(NextToken[0], "abs"))) {
2722     lex();
2723     return true;
2724   }
2725 
2726   return false;
2727 }
2728 
2729 OperandMatchResultTy
2730 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2731                                               bool AllowImm) {
2732   bool Neg, SP3Neg;
2733   bool Abs, SP3Abs;
2734   SMLoc Loc;
2735 
2736   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2737   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2738     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2739     return MatchOperand_ParseFail;
2740   }
2741 
2742   SP3Neg = parseSP3NegModifier();
2743 
2744   Loc = getLoc();
2745   Neg = trySkipId("neg");
2746   if (Neg && SP3Neg) {
2747     Error(Loc, "expected register or immediate");
2748     return MatchOperand_ParseFail;
2749   }
2750   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2751     return MatchOperand_ParseFail;
2752 
2753   Abs = trySkipId("abs");
2754   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2755     return MatchOperand_ParseFail;
2756 
2757   Loc = getLoc();
2758   SP3Abs = trySkipToken(AsmToken::Pipe);
2759   if (Abs && SP3Abs) {
2760     Error(Loc, "expected register or immediate");
2761     return MatchOperand_ParseFail;
2762   }
2763 
2764   OperandMatchResultTy Res;
2765   if (AllowImm) {
2766     Res = parseRegOrImm(Operands, SP3Abs);
2767   } else {
2768     Res = parseReg(Operands);
2769   }
2770   if (Res != MatchOperand_Success) {
2771     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2772   }
2773 
2774   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2775     return MatchOperand_ParseFail;
2776   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2777     return MatchOperand_ParseFail;
2778   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2779     return MatchOperand_ParseFail;
2780 
2781   AMDGPUOperand::Modifiers Mods;
2782   Mods.Abs = Abs || SP3Abs;
2783   Mods.Neg = Neg || SP3Neg;
2784 
2785   if (Mods.hasFPModifiers()) {
2786     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2787     if (Op.isExpr()) {
2788       Error(Op.getStartLoc(), "expected an absolute expression");
2789       return MatchOperand_ParseFail;
2790     }
2791     Op.setModifiers(Mods);
2792   }
2793   return MatchOperand_Success;
2794 }
2795 
2796 OperandMatchResultTy
2797 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2798                                                bool AllowImm) {
2799   bool Sext = trySkipId("sext");
2800   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2801     return MatchOperand_ParseFail;
2802 
2803   OperandMatchResultTy Res;
2804   if (AllowImm) {
2805     Res = parseRegOrImm(Operands);
2806   } else {
2807     Res = parseReg(Operands);
2808   }
2809   if (Res != MatchOperand_Success) {
2810     return Sext? MatchOperand_ParseFail : Res;
2811   }
2812 
2813   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2814     return MatchOperand_ParseFail;
2815 
2816   AMDGPUOperand::Modifiers Mods;
2817   Mods.Sext = Sext;
2818 
2819   if (Mods.hasIntModifiers()) {
2820     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2821     if (Op.isExpr()) {
2822       Error(Op.getStartLoc(), "expected an absolute expression");
2823       return MatchOperand_ParseFail;
2824     }
2825     Op.setModifiers(Mods);
2826   }
2827 
2828   return MatchOperand_Success;
2829 }
2830 
2831 OperandMatchResultTy
2832 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2833   return parseRegOrImmWithFPInputMods(Operands, false);
2834 }
2835 
2836 OperandMatchResultTy
2837 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2838   return parseRegOrImmWithIntInputMods(Operands, false);
2839 }
2840 
2841 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2842   auto Loc = getLoc();
2843   if (trySkipId("off")) {
2844     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2845                                                 AMDGPUOperand::ImmTyOff, false));
2846     return MatchOperand_Success;
2847   }
2848 
2849   if (!isRegister())
2850     return MatchOperand_NoMatch;
2851 
2852   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2853   if (Reg) {
2854     Operands.push_back(std::move(Reg));
2855     return MatchOperand_Success;
2856   }
2857 
2858   return MatchOperand_ParseFail;
2859 
2860 }
2861 
2862 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2863   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2864 
2865   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2866       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2867       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2868       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2869     return Match_InvalidOperand;
2870 
2871   if ((TSFlags & SIInstrFlags::VOP3) &&
2872       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2873       getForcedEncodingSize() != 64)
2874     return Match_PreferE32;
2875 
2876   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2877       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2878     // v_mac_f32/16 allow only dst_sel == DWORD;
2879     auto OpNum =
2880         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2881     const auto &Op = Inst.getOperand(OpNum);
2882     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2883       return Match_InvalidOperand;
2884     }
2885   }
2886 
2887   return Match_Success;
2888 }
2889 
2890 static ArrayRef<unsigned> getAllVariants() {
2891   static const unsigned Variants[] = {
2892     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2893     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2894   };
2895 
2896   return makeArrayRef(Variants);
2897 }
2898 
2899 // What asm variants we should check
2900 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2901   if (getForcedEncodingSize() == 32) {
2902     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2903     return makeArrayRef(Variants);
2904   }
2905 
2906   if (isForcedVOP3()) {
2907     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2908     return makeArrayRef(Variants);
2909   }
2910 
2911   if (isForcedSDWA()) {
2912     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2913                                         AMDGPUAsmVariants::SDWA9};
2914     return makeArrayRef(Variants);
2915   }
2916 
2917   if (isForcedDPP()) {
2918     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2919     return makeArrayRef(Variants);
2920   }
2921 
2922   return getAllVariants();
2923 }
2924 
2925 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2926   if (getForcedEncodingSize() == 32)
2927     return "e32";
2928 
2929   if (isForcedVOP3())
2930     return "e64";
2931 
2932   if (isForcedSDWA())
2933     return "sdwa";
2934 
2935   if (isForcedDPP())
2936     return "dpp";
2937 
2938   return "";
2939 }
2940 
2941 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2942   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2943   const unsigned Num = Desc.getNumImplicitUses();
2944   for (unsigned i = 0; i < Num; ++i) {
2945     unsigned Reg = Desc.ImplicitUses[i];
2946     switch (Reg) {
2947     case AMDGPU::FLAT_SCR:
2948     case AMDGPU::VCC:
2949     case AMDGPU::VCC_LO:
2950     case AMDGPU::VCC_HI:
2951     case AMDGPU::M0:
2952       return Reg;
2953     default:
2954       break;
2955     }
2956   }
2957   return AMDGPU::NoRegister;
2958 }
2959 
2960 // NB: This code is correct only when used to check constant
2961 // bus limitations because GFX7 support no f16 inline constants.
2962 // Note that there are no cases when a GFX7 opcode violates
2963 // constant bus limitations due to the use of an f16 constant.
2964 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2965                                        unsigned OpIdx) const {
2966   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2967 
2968   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2969     return false;
2970   }
2971 
2972   const MCOperand &MO = Inst.getOperand(OpIdx);
2973 
2974   int64_t Val = MO.getImm();
2975   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2976 
2977   switch (OpSize) { // expected operand size
2978   case 8:
2979     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2980   case 4:
2981     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2982   case 2: {
2983     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2984     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2985         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2986         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2987       return AMDGPU::isInlinableIntLiteral(Val);
2988 
2989     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2990         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2991         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2992       return AMDGPU::isInlinableIntLiteralV216(Val);
2993 
2994     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2995         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2996         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2997       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2998 
2999     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3000   }
3001   default:
3002     llvm_unreachable("invalid operand size");
3003   }
3004 }
3005 
3006 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3007   if (!isGFX10Plus())
3008     return 1;
3009 
3010   switch (Opcode) {
3011   // 64-bit shift instructions can use only one scalar value input
3012   case AMDGPU::V_LSHLREV_B64_e64:
3013   case AMDGPU::V_LSHLREV_B64_gfx10:
3014   case AMDGPU::V_LSHRREV_B64_e64:
3015   case AMDGPU::V_LSHRREV_B64_gfx10:
3016   case AMDGPU::V_ASHRREV_I64_e64:
3017   case AMDGPU::V_ASHRREV_I64_gfx10:
3018   case AMDGPU::V_LSHL_B64_e64:
3019   case AMDGPU::V_LSHR_B64_e64:
3020   case AMDGPU::V_ASHR_I64_e64:
3021     return 1;
3022   default:
3023     return 2;
3024   }
3025 }
3026 
3027 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3028   const MCOperand &MO = Inst.getOperand(OpIdx);
3029   if (MO.isImm()) {
3030     return !isInlineConstant(Inst, OpIdx);
3031   } else if (MO.isReg()) {
3032     auto Reg = MO.getReg();
3033     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3034     auto PReg = mc2PseudoReg(Reg);
3035     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3036   } else {
3037     return true;
3038   }
3039 }
3040 
3041 bool
3042 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3043                                                 const OperandVector &Operands) {
3044   const unsigned Opcode = Inst.getOpcode();
3045   const MCInstrDesc &Desc = MII.get(Opcode);
3046   unsigned LastSGPR = AMDGPU::NoRegister;
3047   unsigned ConstantBusUseCount = 0;
3048   unsigned NumLiterals = 0;
3049   unsigned LiteralSize;
3050 
3051   if (Desc.TSFlags &
3052       (SIInstrFlags::VOPC |
3053        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3054        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3055        SIInstrFlags::SDWA)) {
3056     // Check special imm operands (used by madmk, etc)
3057     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3058       ++ConstantBusUseCount;
3059     }
3060 
3061     SmallDenseSet<unsigned> SGPRsUsed;
3062     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3063     if (SGPRUsed != AMDGPU::NoRegister) {
3064       SGPRsUsed.insert(SGPRUsed);
3065       ++ConstantBusUseCount;
3066     }
3067 
3068     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3069     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3070     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3071 
3072     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3073 
3074     for (int OpIdx : OpIndices) {
3075       if (OpIdx == -1) break;
3076 
3077       const MCOperand &MO = Inst.getOperand(OpIdx);
3078       if (usesConstantBus(Inst, OpIdx)) {
3079         if (MO.isReg()) {
3080           LastSGPR = mc2PseudoReg(MO.getReg());
3081           // Pairs of registers with a partial intersections like these
3082           //   s0, s[0:1]
3083           //   flat_scratch_lo, flat_scratch
3084           //   flat_scratch_lo, flat_scratch_hi
3085           // are theoretically valid but they are disabled anyway.
3086           // Note that this code mimics SIInstrInfo::verifyInstruction
3087           if (!SGPRsUsed.count(LastSGPR)) {
3088             SGPRsUsed.insert(LastSGPR);
3089             ++ConstantBusUseCount;
3090           }
3091         } else { // Expression or a literal
3092 
3093           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3094             continue; // special operand like VINTERP attr_chan
3095 
3096           // An instruction may use only one literal.
3097           // This has been validated on the previous step.
3098           // See validateVOP3Literal.
3099           // This literal may be used as more than one operand.
3100           // If all these operands are of the same size,
3101           // this literal counts as one scalar value.
3102           // Otherwise it counts as 2 scalar values.
3103           // See "GFX10 Shader Programming", section 3.6.2.3.
3104 
3105           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3106           if (Size < 4) Size = 4;
3107 
3108           if (NumLiterals == 0) {
3109             NumLiterals = 1;
3110             LiteralSize = Size;
3111           } else if (LiteralSize != Size) {
3112             NumLiterals = 2;
3113           }
3114         }
3115       }
3116     }
3117   }
3118   ConstantBusUseCount += NumLiterals;
3119 
3120   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3121     return true;
3122 
3123   SMLoc LitLoc = getLitLoc(Operands);
3124   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3125   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3126   Error(Loc, "invalid operand (violates constant bus restrictions)");
3127   return false;
3128 }
3129 
3130 bool
3131 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3132                                                  const OperandVector &Operands) {
3133   const unsigned Opcode = Inst.getOpcode();
3134   const MCInstrDesc &Desc = MII.get(Opcode);
3135 
3136   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3137   if (DstIdx == -1 ||
3138       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3139     return true;
3140   }
3141 
3142   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3143 
3144   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3145   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3146   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3147 
3148   assert(DstIdx != -1);
3149   const MCOperand &Dst = Inst.getOperand(DstIdx);
3150   assert(Dst.isReg());
3151   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3152 
3153   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3154 
3155   for (int SrcIdx : SrcIndices) {
3156     if (SrcIdx == -1) break;
3157     const MCOperand &Src = Inst.getOperand(SrcIdx);
3158     if (Src.isReg()) {
3159       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3160       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3161         Error(getRegLoc(SrcReg, Operands),
3162           "destination must be different than all sources");
3163         return false;
3164       }
3165     }
3166   }
3167 
3168   return true;
3169 }
3170 
3171 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3172 
3173   const unsigned Opc = Inst.getOpcode();
3174   const MCInstrDesc &Desc = MII.get(Opc);
3175 
3176   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3177     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3178     assert(ClampIdx != -1);
3179     return Inst.getOperand(ClampIdx).getImm() == 0;
3180   }
3181 
3182   return true;
3183 }
3184 
3185 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3186 
3187   const unsigned Opc = Inst.getOpcode();
3188   const MCInstrDesc &Desc = MII.get(Opc);
3189 
3190   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3191     return true;
3192 
3193   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3194   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3195   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3196 
3197   assert(VDataIdx != -1);
3198 
3199   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3200     return true;
3201 
3202   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3203   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3204   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3205   if (DMask == 0)
3206     DMask = 1;
3207 
3208   unsigned DataSize =
3209     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3210   if (hasPackedD16()) {
3211     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3212     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3213       DataSize = (DataSize + 1) / 2;
3214   }
3215 
3216   return (VDataSize / 4) == DataSize + TFESize;
3217 }
3218 
3219 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3220   const unsigned Opc = Inst.getOpcode();
3221   const MCInstrDesc &Desc = MII.get(Opc);
3222 
3223   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3224     return true;
3225 
3226   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3227 
3228   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3229       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3230   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3231   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3232   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3233 
3234   assert(VAddr0Idx != -1);
3235   assert(SrsrcIdx != -1);
3236   assert(SrsrcIdx > VAddr0Idx);
3237 
3238   if (DimIdx == -1)
3239     return true; // intersect_ray
3240 
3241   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3242   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3243   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3244   unsigned VAddrSize =
3245       IsNSA ? SrsrcIdx - VAddr0Idx
3246             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3247 
3248   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3249                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3250                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3251                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3252   if (!IsNSA) {
3253     if (AddrSize > 8)
3254       AddrSize = 16;
3255     else if (AddrSize > 4)
3256       AddrSize = 8;
3257   }
3258 
3259   return VAddrSize == AddrSize;
3260 }
3261 
3262 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3263 
3264   const unsigned Opc = Inst.getOpcode();
3265   const MCInstrDesc &Desc = MII.get(Opc);
3266 
3267   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3268     return true;
3269   if (!Desc.mayLoad() || !Desc.mayStore())
3270     return true; // Not atomic
3271 
3272   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3273   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3274 
3275   // This is an incomplete check because image_atomic_cmpswap
3276   // may only use 0x3 and 0xf while other atomic operations
3277   // may use 0x1 and 0x3. However these limitations are
3278   // verified when we check that dmask matches dst size.
3279   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3280 }
3281 
3282 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3283 
3284   const unsigned Opc = Inst.getOpcode();
3285   const MCInstrDesc &Desc = MII.get(Opc);
3286 
3287   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3288     return true;
3289 
3290   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3291   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3292 
3293   // GATHER4 instructions use dmask in a different fashion compared to
3294   // other MIMG instructions. The only useful DMASK values are
3295   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3296   // (red,red,red,red) etc.) The ISA document doesn't mention
3297   // this.
3298   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3299 }
3300 
3301 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3302 {
3303   switch (Opcode) {
3304   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3305   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3306   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3307     return true;
3308   default:
3309     return false;
3310   }
3311 }
3312 
3313 // movrels* opcodes should only allow VGPRS as src0.
3314 // This is specified in .td description for vop1/vop3,
3315 // but sdwa is handled differently. See isSDWAOperand.
3316 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3317                                       const OperandVector &Operands) {
3318 
3319   const unsigned Opc = Inst.getOpcode();
3320   const MCInstrDesc &Desc = MII.get(Opc);
3321 
3322   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3323     return true;
3324 
3325   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3326   assert(Src0Idx != -1);
3327 
3328   SMLoc ErrLoc;
3329   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3330   if (Src0.isReg()) {
3331     auto Reg = mc2PseudoReg(Src0.getReg());
3332     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3333     if (!isSGPR(Reg, TRI))
3334       return true;
3335     ErrLoc = getRegLoc(Reg, Operands);
3336   } else {
3337     ErrLoc = getConstLoc(Operands);
3338   }
3339 
3340   Error(ErrLoc, "source operand must be a VGPR");
3341   return false;
3342 }
3343 
3344 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3345                                           const OperandVector &Operands) {
3346 
3347   const unsigned Opc = Inst.getOpcode();
3348 
3349   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3350     return true;
3351 
3352   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3353   assert(Src0Idx != -1);
3354 
3355   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3356   if (!Src0.isReg())
3357     return true;
3358 
3359   auto Reg = mc2PseudoReg(Src0.getReg());
3360   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3361   if (isSGPR(Reg, TRI)) {
3362     Error(getRegLoc(Reg, Operands),
3363           "source operand must be either a VGPR or an inline constant");
3364     return false;
3365   }
3366 
3367   return true;
3368 }
3369 
3370 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3371   switch (Inst.getOpcode()) {
3372   default:
3373     return true;
3374   case V_DIV_SCALE_F32_gfx6_gfx7:
3375   case V_DIV_SCALE_F32_vi:
3376   case V_DIV_SCALE_F32_gfx10:
3377   case V_DIV_SCALE_F64_gfx6_gfx7:
3378   case V_DIV_SCALE_F64_vi:
3379   case V_DIV_SCALE_F64_gfx10:
3380     break;
3381   }
3382 
3383   // TODO: Check that src0 = src1 or src2.
3384 
3385   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3386                     AMDGPU::OpName::src2_modifiers,
3387                     AMDGPU::OpName::src2_modifiers}) {
3388     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3389             .getImm() &
3390         SISrcMods::ABS) {
3391       return false;
3392     }
3393   }
3394 
3395   return true;
3396 }
3397 
3398 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3399 
3400   const unsigned Opc = Inst.getOpcode();
3401   const MCInstrDesc &Desc = MII.get(Opc);
3402 
3403   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3404     return true;
3405 
3406   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3408     if (isCI() || isSI())
3409       return false;
3410   }
3411 
3412   return true;
3413 }
3414 
3415 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3416   const unsigned Opc = Inst.getOpcode();
3417   const MCInstrDesc &Desc = MII.get(Opc);
3418 
3419   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3420     return true;
3421 
3422   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3423   if (DimIdx < 0)
3424     return true;
3425 
3426   long Imm = Inst.getOperand(DimIdx).getImm();
3427   if (Imm < 0 || Imm >= 8)
3428     return false;
3429 
3430   return true;
3431 }
3432 
3433 static bool IsRevOpcode(const unsigned Opcode)
3434 {
3435   switch (Opcode) {
3436   case AMDGPU::V_SUBREV_F32_e32:
3437   case AMDGPU::V_SUBREV_F32_e64:
3438   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3439   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3440   case AMDGPU::V_SUBREV_F32_e32_vi:
3441   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3442   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3443   case AMDGPU::V_SUBREV_F32_e64_vi:
3444 
3445   case AMDGPU::V_SUBREV_CO_U32_e32:
3446   case AMDGPU::V_SUBREV_CO_U32_e64:
3447   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3448   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3449 
3450   case AMDGPU::V_SUBBREV_U32_e32:
3451   case AMDGPU::V_SUBBREV_U32_e64:
3452   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3453   case AMDGPU::V_SUBBREV_U32_e32_vi:
3454   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3455   case AMDGPU::V_SUBBREV_U32_e64_vi:
3456 
3457   case AMDGPU::V_SUBREV_U32_e32:
3458   case AMDGPU::V_SUBREV_U32_e64:
3459   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3460   case AMDGPU::V_SUBREV_U32_e32_vi:
3461   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3462   case AMDGPU::V_SUBREV_U32_e64_vi:
3463 
3464   case AMDGPU::V_SUBREV_F16_e32:
3465   case AMDGPU::V_SUBREV_F16_e64:
3466   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3467   case AMDGPU::V_SUBREV_F16_e32_vi:
3468   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3469   case AMDGPU::V_SUBREV_F16_e64_vi:
3470 
3471   case AMDGPU::V_SUBREV_U16_e32:
3472   case AMDGPU::V_SUBREV_U16_e64:
3473   case AMDGPU::V_SUBREV_U16_e32_vi:
3474   case AMDGPU::V_SUBREV_U16_e64_vi:
3475 
3476   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3477   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3478   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3479 
3480   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3481   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3482 
3483   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3484   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3485 
3486   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3487   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3488 
3489   case AMDGPU::V_LSHRREV_B32_e32:
3490   case AMDGPU::V_LSHRREV_B32_e64:
3491   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3492   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3493   case AMDGPU::V_LSHRREV_B32_e32_vi:
3494   case AMDGPU::V_LSHRREV_B32_e64_vi:
3495   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3496   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3497 
3498   case AMDGPU::V_ASHRREV_I32_e32:
3499   case AMDGPU::V_ASHRREV_I32_e64:
3500   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3501   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3502   case AMDGPU::V_ASHRREV_I32_e32_vi:
3503   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3504   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3505   case AMDGPU::V_ASHRREV_I32_e64_vi:
3506 
3507   case AMDGPU::V_LSHLREV_B32_e32:
3508   case AMDGPU::V_LSHLREV_B32_e64:
3509   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3510   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3511   case AMDGPU::V_LSHLREV_B32_e32_vi:
3512   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3513   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3514   case AMDGPU::V_LSHLREV_B32_e64_vi:
3515 
3516   case AMDGPU::V_LSHLREV_B16_e32:
3517   case AMDGPU::V_LSHLREV_B16_e64:
3518   case AMDGPU::V_LSHLREV_B16_e32_vi:
3519   case AMDGPU::V_LSHLREV_B16_e64_vi:
3520   case AMDGPU::V_LSHLREV_B16_gfx10:
3521 
3522   case AMDGPU::V_LSHRREV_B16_e32:
3523   case AMDGPU::V_LSHRREV_B16_e64:
3524   case AMDGPU::V_LSHRREV_B16_e32_vi:
3525   case AMDGPU::V_LSHRREV_B16_e64_vi:
3526   case AMDGPU::V_LSHRREV_B16_gfx10:
3527 
3528   case AMDGPU::V_ASHRREV_I16_e32:
3529   case AMDGPU::V_ASHRREV_I16_e64:
3530   case AMDGPU::V_ASHRREV_I16_e32_vi:
3531   case AMDGPU::V_ASHRREV_I16_e64_vi:
3532   case AMDGPU::V_ASHRREV_I16_gfx10:
3533 
3534   case AMDGPU::V_LSHLREV_B64_e64:
3535   case AMDGPU::V_LSHLREV_B64_gfx10:
3536   case AMDGPU::V_LSHLREV_B64_vi:
3537 
3538   case AMDGPU::V_LSHRREV_B64_e64:
3539   case AMDGPU::V_LSHRREV_B64_gfx10:
3540   case AMDGPU::V_LSHRREV_B64_vi:
3541 
3542   case AMDGPU::V_ASHRREV_I64_e64:
3543   case AMDGPU::V_ASHRREV_I64_gfx10:
3544   case AMDGPU::V_ASHRREV_I64_vi:
3545 
3546   case AMDGPU::V_PK_LSHLREV_B16:
3547   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3548   case AMDGPU::V_PK_LSHLREV_B16_vi:
3549 
3550   case AMDGPU::V_PK_LSHRREV_B16:
3551   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3552   case AMDGPU::V_PK_LSHRREV_B16_vi:
3553   case AMDGPU::V_PK_ASHRREV_I16:
3554   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3555   case AMDGPU::V_PK_ASHRREV_I16_vi:
3556     return true;
3557   default:
3558     return false;
3559   }
3560 }
3561 
3562 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3563 
3564   using namespace SIInstrFlags;
3565   const unsigned Opcode = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opcode);
3567 
3568   // lds_direct register is defined so that it can be used
3569   // with 9-bit operands only. Ignore encodings which do not accept these.
3570   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3571     return true;
3572 
3573   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3574   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3575   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3576 
3577   const int SrcIndices[] = { Src1Idx, Src2Idx };
3578 
3579   // lds_direct cannot be specified as either src1 or src2.
3580   for (int SrcIdx : SrcIndices) {
3581     if (SrcIdx == -1) break;
3582     const MCOperand &Src = Inst.getOperand(SrcIdx);
3583     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3584       return false;
3585     }
3586   }
3587 
3588   if (Src0Idx == -1)
3589     return true;
3590 
3591   const MCOperand &Src = Inst.getOperand(Src0Idx);
3592   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3593     return true;
3594 
3595   // lds_direct is specified as src0. Check additional limitations.
3596   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3597 }
3598 
3599 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3600   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3601     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3602     if (Op.isFlatOffset())
3603       return Op.getStartLoc();
3604   }
3605   return getLoc();
3606 }
3607 
3608 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3609                                          const OperandVector &Operands) {
3610   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3611   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3612     return true;
3613 
3614   auto Opcode = Inst.getOpcode();
3615   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3616   assert(OpNum != -1);
3617 
3618   const auto &Op = Inst.getOperand(OpNum);
3619   if (!hasFlatOffsets() && Op.getImm() != 0) {
3620     Error(getFlatOffsetLoc(Operands),
3621           "flat offset modifier is not supported on this GPU");
3622     return false;
3623   }
3624 
3625   // For FLAT segment the offset must be positive;
3626   // MSB is ignored and forced to zero.
3627   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3628     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3629     if (!isIntN(OffsetSize, Op.getImm())) {
3630       Error(getFlatOffsetLoc(Operands),
3631             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3632       return false;
3633     }
3634   } else {
3635     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3636     if (!isUIntN(OffsetSize, Op.getImm())) {
3637       Error(getFlatOffsetLoc(Operands),
3638             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3639       return false;
3640     }
3641   }
3642 
3643   return true;
3644 }
3645 
3646 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3647   // Start with second operand because SMEM Offset cannot be dst or src0.
3648   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3649     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3650     if (Op.isSMEMOffset())
3651       return Op.getStartLoc();
3652   }
3653   return getLoc();
3654 }
3655 
3656 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3657                                          const OperandVector &Operands) {
3658   if (isCI() || isSI())
3659     return true;
3660 
3661   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3662   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3663     return true;
3664 
3665   auto Opcode = Inst.getOpcode();
3666   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3667   if (OpNum == -1)
3668     return true;
3669 
3670   const auto &Op = Inst.getOperand(OpNum);
3671   if (!Op.isImm())
3672     return true;
3673 
3674   uint64_t Offset = Op.getImm();
3675   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3676   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3677       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3678     return true;
3679 
3680   Error(getSMEMOffsetLoc(Operands),
3681         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3682                                "expected a 21-bit signed offset");
3683 
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3688   unsigned Opcode = Inst.getOpcode();
3689   const MCInstrDesc &Desc = MII.get(Opcode);
3690   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3691     return true;
3692 
3693   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3694   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3695 
3696   const int OpIndices[] = { Src0Idx, Src1Idx };
3697 
3698   unsigned NumExprs = 0;
3699   unsigned NumLiterals = 0;
3700   uint32_t LiteralValue;
3701 
3702   for (int OpIdx : OpIndices) {
3703     if (OpIdx == -1) break;
3704 
3705     const MCOperand &MO = Inst.getOperand(OpIdx);
3706     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3707     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3708       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3709         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3710         if (NumLiterals == 0 || LiteralValue != Value) {
3711           LiteralValue = Value;
3712           ++NumLiterals;
3713         }
3714       } else if (MO.isExpr()) {
3715         ++NumExprs;
3716       }
3717     }
3718   }
3719 
3720   return NumLiterals + NumExprs <= 1;
3721 }
3722 
3723 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3724   const unsigned Opc = Inst.getOpcode();
3725   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3726       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3727     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3728     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3729 
3730     if (OpSel & ~3)
3731       return false;
3732   }
3733   return true;
3734 }
3735 
3736 // Check if VCC register matches wavefront size
3737 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3738   auto FB = getFeatureBits();
3739   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3740     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3741 }
3742 
3743 // VOP3 literal is only allowed in GFX10+ and only one can be used
3744 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3745                                           const OperandVector &Operands) {
3746   unsigned Opcode = Inst.getOpcode();
3747   const MCInstrDesc &Desc = MII.get(Opcode);
3748   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3749     return true;
3750 
3751   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3752   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3753   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3754 
3755   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3756 
3757   unsigned NumExprs = 0;
3758   unsigned NumLiterals = 0;
3759   uint32_t LiteralValue;
3760 
3761   for (int OpIdx : OpIndices) {
3762     if (OpIdx == -1) break;
3763 
3764     const MCOperand &MO = Inst.getOperand(OpIdx);
3765     if (!MO.isImm() && !MO.isExpr())
3766       continue;
3767     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3768       continue;
3769 
3770     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3771         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3772       Error(getConstLoc(Operands),
3773             "inline constants are not allowed for this operand");
3774       return false;
3775     }
3776 
3777     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3778       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3779       if (NumLiterals == 0 || LiteralValue != Value) {
3780         LiteralValue = Value;
3781         ++NumLiterals;
3782       }
3783     } else if (MO.isExpr()) {
3784       ++NumExprs;
3785     }
3786   }
3787   NumLiterals += NumExprs;
3788 
3789   if (!NumLiterals)
3790     return true;
3791 
3792   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3793     Error(getLitLoc(Operands), "literal operands are not supported");
3794     return false;
3795   }
3796 
3797   if (NumLiterals > 1) {
3798     Error(getLitLoc(Operands), "only one literal operand is allowed");
3799     return false;
3800   }
3801 
3802   return true;
3803 }
3804 
3805 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3806                                             const OperandVector &Operands,
3807                                             const SMLoc &IDLoc) {
3808   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3809                                           AMDGPU::OpName::glc1);
3810   if (GLCPos != -1) {
3811     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3812     // in the asm string, and the default value means it is not present.
3813     if (Inst.getOperand(GLCPos).getImm() == -1) {
3814       Error(IDLoc, "instruction must use glc");
3815       return false;
3816     }
3817   }
3818 
3819   return true;
3820 }
3821 
3822 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3823                                           const SMLoc &IDLoc,
3824                                           const OperandVector &Operands) {
3825   if (!validateLdsDirect(Inst)) {
3826     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3827       "invalid use of lds_direct");
3828     return false;
3829   }
3830   if (!validateSOPLiteral(Inst)) {
3831     Error(getLitLoc(Operands),
3832       "only one literal operand is allowed");
3833     return false;
3834   }
3835   if (!validateVOP3Literal(Inst, Operands)) {
3836     return false;
3837   }
3838   if (!validateConstantBusLimitations(Inst, Operands)) {
3839     return false;
3840   }
3841   if (!validateEarlyClobberLimitations(Inst, Operands)) {
3842     return false;
3843   }
3844   if (!validateIntClampSupported(Inst)) {
3845     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3846       "integer clamping is not supported on this GPU");
3847     return false;
3848   }
3849   if (!validateOpSel(Inst)) {
3850     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3851       "invalid op_sel operand");
3852     return false;
3853   }
3854   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3855   if (!validateMIMGD16(Inst)) {
3856     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3857       "d16 modifier is not supported on this GPU");
3858     return false;
3859   }
3860   if (!validateMIMGDim(Inst)) {
3861     Error(IDLoc, "dim modifier is required on this GPU");
3862     return false;
3863   }
3864   if (!validateMIMGDataSize(Inst)) {
3865     Error(IDLoc,
3866       "image data size does not match dmask and tfe");
3867     return false;
3868   }
3869   if (!validateMIMGAddrSize(Inst)) {
3870     Error(IDLoc,
3871       "image address size does not match dim and a16");
3872     return false;
3873   }
3874   if (!validateMIMGAtomicDMask(Inst)) {
3875     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3876       "invalid atomic image dmask");
3877     return false;
3878   }
3879   if (!validateMIMGGatherDMask(Inst)) {
3880     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3881       "invalid image_gather dmask: only one bit must be set");
3882     return false;
3883   }
3884   if (!validateMovrels(Inst, Operands)) {
3885     return false;
3886   }
3887   if (!validateFlatOffset(Inst, Operands)) {
3888     return false;
3889   }
3890   if (!validateSMEMOffset(Inst, Operands)) {
3891     return false;
3892   }
3893   if (!validateMAIAccWrite(Inst, Operands)) {
3894     return false;
3895   }
3896   if (!validateDivScale(Inst)) {
3897     Error(IDLoc, "ABS not allowed in VOP3B instructions");
3898     return false;
3899   }
3900   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3901     return false;
3902   }
3903 
3904   return true;
3905 }
3906 
3907 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3908                                             const FeatureBitset &FBS,
3909                                             unsigned VariantID = 0);
3910 
3911 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3912                                 const FeatureBitset &AvailableFeatures,
3913                                 unsigned VariantID);
3914 
3915 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3916                                        const FeatureBitset &FBS) {
3917   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3918 }
3919 
3920 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3921                                        const FeatureBitset &FBS,
3922                                        ArrayRef<unsigned> Variants) {
3923   for (auto Variant : Variants) {
3924     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3925       return true;
3926   }
3927 
3928   return false;
3929 }
3930 
3931 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3932                                                   const SMLoc &IDLoc) {
3933   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3934 
3935   // Check if requested instruction variant is supported.
3936   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3937     return false;
3938 
3939   // This instruction is not supported.
3940   // Clear any other pending errors because they are no longer relevant.
3941   getParser().clearPendingErrors();
3942 
3943   // Requested instruction variant is not supported.
3944   // Check if any other variants are supported.
3945   StringRef VariantName = getMatchedVariantName();
3946   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3947     return Error(IDLoc,
3948                  Twine(VariantName,
3949                        " variant of this instruction is not supported"));
3950   }
3951 
3952   // Finally check if this instruction is supported on any other GPU.
3953   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3954     return Error(IDLoc, "instruction not supported on this GPU");
3955   }
3956 
3957   // Instruction not supported on any GPU. Probably a typo.
3958   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3959   return Error(IDLoc, "invalid instruction" + Suggestion);
3960 }
3961 
3962 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3963                                               OperandVector &Operands,
3964                                               MCStreamer &Out,
3965                                               uint64_t &ErrorInfo,
3966                                               bool MatchingInlineAsm) {
3967   MCInst Inst;
3968   unsigned Result = Match_Success;
3969   for (auto Variant : getMatchedVariants()) {
3970     uint64_t EI;
3971     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3972                                   Variant);
3973     // We order match statuses from least to most specific. We use most specific
3974     // status as resulting
3975     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3976     if ((R == Match_Success) ||
3977         (R == Match_PreferE32) ||
3978         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3979         (R == Match_InvalidOperand && Result != Match_MissingFeature
3980                                    && Result != Match_PreferE32) ||
3981         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3982                                    && Result != Match_MissingFeature
3983                                    && Result != Match_PreferE32)) {
3984       Result = R;
3985       ErrorInfo = EI;
3986     }
3987     if (R == Match_Success)
3988       break;
3989   }
3990 
3991   if (Result == Match_Success) {
3992     if (!validateInstruction(Inst, IDLoc, Operands)) {
3993       return true;
3994     }
3995     Inst.setLoc(IDLoc);
3996     Out.emitInstruction(Inst, getSTI());
3997     return false;
3998   }
3999 
4000   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4001   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4002     return true;
4003   }
4004 
4005   switch (Result) {
4006   default: break;
4007   case Match_MissingFeature:
4008     // It has been verified that the specified instruction
4009     // mnemonic is valid. A match was found but it requires
4010     // features which are not supported on this GPU.
4011     return Error(IDLoc, "operands are not valid for this GPU or mode");
4012 
4013   case Match_InvalidOperand: {
4014     SMLoc ErrorLoc = IDLoc;
4015     if (ErrorInfo != ~0ULL) {
4016       if (ErrorInfo >= Operands.size()) {
4017         return Error(IDLoc, "too few operands for instruction");
4018       }
4019       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4020       if (ErrorLoc == SMLoc())
4021         ErrorLoc = IDLoc;
4022     }
4023     return Error(ErrorLoc, "invalid operand for instruction");
4024   }
4025 
4026   case Match_PreferE32:
4027     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4028                         "should be encoded as e32");
4029   case Match_MnemonicFail:
4030     llvm_unreachable("Invalid instructions should have been handled already");
4031   }
4032   llvm_unreachable("Implement any new match types added!");
4033 }
4034 
4035 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4036   int64_t Tmp = -1;
4037   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4038     return true;
4039   }
4040   if (getParser().parseAbsoluteExpression(Tmp)) {
4041     return true;
4042   }
4043   Ret = static_cast<uint32_t>(Tmp);
4044   return false;
4045 }
4046 
4047 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4048                                                uint32_t &Minor) {
4049   if (ParseAsAbsoluteExpression(Major))
4050     return TokError("invalid major version");
4051 
4052   if (!trySkipToken(AsmToken::Comma))
4053     return TokError("minor version number required, comma expected");
4054 
4055   if (ParseAsAbsoluteExpression(Minor))
4056     return TokError("invalid minor version");
4057 
4058   return false;
4059 }
4060 
4061 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4062   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4063     return TokError("directive only supported for amdgcn architecture");
4064 
4065   std::string Target;
4066 
4067   SMLoc TargetStart = getLoc();
4068   if (getParser().parseEscapedString(Target))
4069     return true;
4070   SMRange TargetRange = SMRange(TargetStart, getLoc());
4071 
4072   std::string ExpectedTarget;
4073   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4074   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4075 
4076   if (Target != ExpectedTargetOS.str())
4077     return Error(TargetRange.Start, "target must match options", TargetRange);
4078 
4079   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4080   return false;
4081 }
4082 
4083 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4084   return Error(Range.Start, "value out of range", Range);
4085 }
4086 
4087 bool AMDGPUAsmParser::calculateGPRBlocks(
4088     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4089     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4090     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4091     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4092   // TODO(scott.linder): These calculations are duplicated from
4093   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4094   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4095 
4096   unsigned NumVGPRs = NextFreeVGPR;
4097   unsigned NumSGPRs = NextFreeSGPR;
4098 
4099   if (Version.Major >= 10)
4100     NumSGPRs = 0;
4101   else {
4102     unsigned MaxAddressableNumSGPRs =
4103         IsaInfo::getAddressableNumSGPRs(&getSTI());
4104 
4105     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4106         NumSGPRs > MaxAddressableNumSGPRs)
4107       return OutOfRangeError(SGPRRange);
4108 
4109     NumSGPRs +=
4110         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4111 
4112     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4113         NumSGPRs > MaxAddressableNumSGPRs)
4114       return OutOfRangeError(SGPRRange);
4115 
4116     if (Features.test(FeatureSGPRInitBug))
4117       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4118   }
4119 
4120   VGPRBlocks =
4121       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4122   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4123 
4124   return false;
4125 }
4126 
4127 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4128   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4129     return TokError("directive only supported for amdgcn architecture");
4130 
4131   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4132     return TokError("directive only supported for amdhsa OS");
4133 
4134   StringRef KernelName;
4135   if (getParser().parseIdentifier(KernelName))
4136     return true;
4137 
4138   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4139 
4140   StringSet<> Seen;
4141 
4142   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4143 
4144   SMRange VGPRRange;
4145   uint64_t NextFreeVGPR = 0;
4146   SMRange SGPRRange;
4147   uint64_t NextFreeSGPR = 0;
4148   unsigned UserSGPRCount = 0;
4149   bool ReserveVCC = true;
4150   bool ReserveFlatScr = true;
4151   bool ReserveXNACK = hasXNACK();
4152   Optional<bool> EnableWavefrontSize32;
4153 
4154   while (true) {
4155     while (trySkipToken(AsmToken::EndOfStatement));
4156 
4157     StringRef ID;
4158     SMRange IDRange = getTok().getLocRange();
4159     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4160       return true;
4161 
4162     if (ID == ".end_amdhsa_kernel")
4163       break;
4164 
4165     if (Seen.find(ID) != Seen.end())
4166       return TokError(".amdhsa_ directives cannot be repeated");
4167     Seen.insert(ID);
4168 
4169     SMLoc ValStart = getLoc();
4170     int64_t IVal;
4171     if (getParser().parseAbsoluteExpression(IVal))
4172       return true;
4173     SMLoc ValEnd = getLoc();
4174     SMRange ValRange = SMRange(ValStart, ValEnd);
4175 
4176     if (IVal < 0)
4177       return OutOfRangeError(ValRange);
4178 
4179     uint64_t Val = IVal;
4180 
4181 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4182   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4183     return OutOfRangeError(RANGE);                                             \
4184   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4185 
4186     if (ID == ".amdhsa_group_segment_fixed_size") {
4187       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4188         return OutOfRangeError(ValRange);
4189       KD.group_segment_fixed_size = Val;
4190     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4191       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4192         return OutOfRangeError(ValRange);
4193       KD.private_segment_fixed_size = Val;
4194     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4195       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4196                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4197                        Val, ValRange);
4198       if (Val)
4199         UserSGPRCount += 4;
4200     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4201       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4202                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4203                        ValRange);
4204       if (Val)
4205         UserSGPRCount += 2;
4206     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4207       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4208                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4209                        ValRange);
4210       if (Val)
4211         UserSGPRCount += 2;
4212     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4213       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4214                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4215                        Val, ValRange);
4216       if (Val)
4217         UserSGPRCount += 2;
4218     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4219       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4220                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4221                        ValRange);
4222       if (Val)
4223         UserSGPRCount += 2;
4224     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4225       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4226                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4227                        ValRange);
4228       if (Val)
4229         UserSGPRCount += 2;
4230     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4231       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4232                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4233                        Val, ValRange);
4234       if (Val)
4235         UserSGPRCount += 1;
4236     } else if (ID == ".amdhsa_wavefront_size32") {
4237       if (IVersion.Major < 10)
4238         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4239       EnableWavefrontSize32 = Val;
4240       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4241                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4242                        Val, ValRange);
4243     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4244       PARSE_BITS_ENTRY(
4245           KD.compute_pgm_rsrc2,
4246           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4247           ValRange);
4248     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4249       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4250                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4251                        ValRange);
4252     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4253       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4254                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4255                        ValRange);
4256     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4257       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4258                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4259                        ValRange);
4260     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4261       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4262                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4263                        ValRange);
4264     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4265       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4266                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4267                        ValRange);
4268     } else if (ID == ".amdhsa_next_free_vgpr") {
4269       VGPRRange = ValRange;
4270       NextFreeVGPR = Val;
4271     } else if (ID == ".amdhsa_next_free_sgpr") {
4272       SGPRRange = ValRange;
4273       NextFreeSGPR = Val;
4274     } else if (ID == ".amdhsa_reserve_vcc") {
4275       if (!isUInt<1>(Val))
4276         return OutOfRangeError(ValRange);
4277       ReserveVCC = Val;
4278     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4279       if (IVersion.Major < 7)
4280         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4281       if (!isUInt<1>(Val))
4282         return OutOfRangeError(ValRange);
4283       ReserveFlatScr = Val;
4284     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4285       if (IVersion.Major < 8)
4286         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4287       if (!isUInt<1>(Val))
4288         return OutOfRangeError(ValRange);
4289       ReserveXNACK = Val;
4290     } else if (ID == ".amdhsa_float_round_mode_32") {
4291       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4292                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4293     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4294       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4295                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4296     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4297       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4298                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4299     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4300       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4301                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4302                        ValRange);
4303     } else if (ID == ".amdhsa_dx10_clamp") {
4304       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4305                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4306     } else if (ID == ".amdhsa_ieee_mode") {
4307       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4308                        Val, ValRange);
4309     } else if (ID == ".amdhsa_fp16_overflow") {
4310       if (IVersion.Major < 9)
4311         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4312       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4313                        ValRange);
4314     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4315       if (IVersion.Major < 10)
4316         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4317       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4318                        ValRange);
4319     } else if (ID == ".amdhsa_memory_ordered") {
4320       if (IVersion.Major < 10)
4321         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4322       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4323                        ValRange);
4324     } else if (ID == ".amdhsa_forward_progress") {
4325       if (IVersion.Major < 10)
4326         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4327       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4328                        ValRange);
4329     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4330       PARSE_BITS_ENTRY(
4331           KD.compute_pgm_rsrc2,
4332           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4333           ValRange);
4334     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4335       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4336                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4337                        Val, ValRange);
4338     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4339       PARSE_BITS_ENTRY(
4340           KD.compute_pgm_rsrc2,
4341           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4342           ValRange);
4343     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4344       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4345                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4346                        Val, ValRange);
4347     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4348       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4349                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4350                        Val, ValRange);
4351     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4352       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4353                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4354                        Val, ValRange);
4355     } else if (ID == ".amdhsa_exception_int_div_zero") {
4356       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4357                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4358                        Val, ValRange);
4359     } else {
4360       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4361     }
4362 
4363 #undef PARSE_BITS_ENTRY
4364   }
4365 
4366   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4367     return TokError(".amdhsa_next_free_vgpr directive is required");
4368 
4369   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4370     return TokError(".amdhsa_next_free_sgpr directive is required");
4371 
4372   unsigned VGPRBlocks;
4373   unsigned SGPRBlocks;
4374   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4375                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4376                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4377                          SGPRBlocks))
4378     return true;
4379 
4380   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4381           VGPRBlocks))
4382     return OutOfRangeError(VGPRRange);
4383   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4384                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4385 
4386   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4387           SGPRBlocks))
4388     return OutOfRangeError(SGPRRange);
4389   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4390                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4391                   SGPRBlocks);
4392 
4393   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4394     return TokError("too many user SGPRs enabled");
4395   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4396                   UserSGPRCount);
4397 
4398   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4399       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4400       ReserveFlatScr, ReserveXNACK);
4401   return false;
4402 }
4403 
4404 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4405   uint32_t Major;
4406   uint32_t Minor;
4407 
4408   if (ParseDirectiveMajorMinor(Major, Minor))
4409     return true;
4410 
4411   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4412   return false;
4413 }
4414 
4415 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4416   uint32_t Major;
4417   uint32_t Minor;
4418   uint32_t Stepping;
4419   StringRef VendorName;
4420   StringRef ArchName;
4421 
4422   // If this directive has no arguments, then use the ISA version for the
4423   // targeted GPU.
4424   if (isToken(AsmToken::EndOfStatement)) {
4425     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4426     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4427                                                       ISA.Stepping,
4428                                                       "AMD", "AMDGPU");
4429     return false;
4430   }
4431 
4432   if (ParseDirectiveMajorMinor(Major, Minor))
4433     return true;
4434 
4435   if (!trySkipToken(AsmToken::Comma))
4436     return TokError("stepping version number required, comma expected");
4437 
4438   if (ParseAsAbsoluteExpression(Stepping))
4439     return TokError("invalid stepping version");
4440 
4441   if (!trySkipToken(AsmToken::Comma))
4442     return TokError("vendor name required, comma expected");
4443 
4444   if (!parseString(VendorName, "invalid vendor name"))
4445     return true;
4446 
4447   if (!trySkipToken(AsmToken::Comma))
4448     return TokError("arch name required, comma expected");
4449 
4450   if (!parseString(ArchName, "invalid arch name"))
4451     return true;
4452 
4453   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4454                                                     VendorName, ArchName);
4455   return false;
4456 }
4457 
4458 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4459                                                amd_kernel_code_t &Header) {
4460   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4461   // assembly for backwards compatibility.
4462   if (ID == "max_scratch_backing_memory_byte_size") {
4463     Parser.eatToEndOfStatement();
4464     return false;
4465   }
4466 
4467   SmallString<40> ErrStr;
4468   raw_svector_ostream Err(ErrStr);
4469   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4470     return TokError(Err.str());
4471   }
4472   Lex();
4473 
4474   if (ID == "enable_wavefront_size32") {
4475     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4476       if (!isGFX10Plus())
4477         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4478       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4479         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4480     } else {
4481       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4482         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4483     }
4484   }
4485 
4486   if (ID == "wavefront_size") {
4487     if (Header.wavefront_size == 5) {
4488       if (!isGFX10Plus())
4489         return TokError("wavefront_size=5 is only allowed on GFX10+");
4490       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4491         return TokError("wavefront_size=5 requires +WavefrontSize32");
4492     } else if (Header.wavefront_size == 6) {
4493       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4494         return TokError("wavefront_size=6 requires +WavefrontSize64");
4495     }
4496   }
4497 
4498   if (ID == "enable_wgp_mode") {
4499     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4500         !isGFX10Plus())
4501       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4502   }
4503 
4504   if (ID == "enable_mem_ordered") {
4505     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4506         !isGFX10Plus())
4507       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4508   }
4509 
4510   if (ID == "enable_fwd_progress") {
4511     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4512         !isGFX10Plus())
4513       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4514   }
4515 
4516   return false;
4517 }
4518 
4519 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4520   amd_kernel_code_t Header;
4521   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4522 
4523   while (true) {
4524     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4525     // will set the current token to EndOfStatement.
4526     while(trySkipToken(AsmToken::EndOfStatement));
4527 
4528     StringRef ID;
4529     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4530       return true;
4531 
4532     if (ID == ".end_amd_kernel_code_t")
4533       break;
4534 
4535     if (ParseAMDKernelCodeTValue(ID, Header))
4536       return true;
4537   }
4538 
4539   getTargetStreamer().EmitAMDKernelCodeT(Header);
4540 
4541   return false;
4542 }
4543 
4544 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4545   StringRef KernelName;
4546   if (!parseId(KernelName, "expected symbol name"))
4547     return true;
4548 
4549   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4550                                            ELF::STT_AMDGPU_HSA_KERNEL);
4551 
4552   KernelScope.initialize(getContext());
4553   return false;
4554 }
4555 
4556 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4557   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4558     return Error(getLoc(),
4559                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4560                  "architectures");
4561   }
4562 
4563   auto ISAVersionStringFromASM = getToken().getStringContents();
4564 
4565   std::string ISAVersionStringFromSTI;
4566   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4567   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4568 
4569   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4570     return Error(getLoc(),
4571                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4572                  "arguments specified through the command line");
4573   }
4574 
4575   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4576   Lex();
4577 
4578   return false;
4579 }
4580 
4581 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4582   const char *AssemblerDirectiveBegin;
4583   const char *AssemblerDirectiveEnd;
4584   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4585       isHsaAbiVersion3(&getSTI())
4586           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4587                             HSAMD::V3::AssemblerDirectiveEnd)
4588           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4589                             HSAMD::AssemblerDirectiveEnd);
4590 
4591   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4592     return Error(getLoc(),
4593                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4594                  "not available on non-amdhsa OSes")).str());
4595   }
4596 
4597   std::string HSAMetadataString;
4598   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4599                           HSAMetadataString))
4600     return true;
4601 
4602   if (isHsaAbiVersion3(&getSTI())) {
4603     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4604       return Error(getLoc(), "invalid HSA metadata");
4605   } else {
4606     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4607       return Error(getLoc(), "invalid HSA metadata");
4608   }
4609 
4610   return false;
4611 }
4612 
4613 /// Common code to parse out a block of text (typically YAML) between start and
4614 /// end directives.
4615 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4616                                           const char *AssemblerDirectiveEnd,
4617                                           std::string &CollectString) {
4618 
4619   raw_string_ostream CollectStream(CollectString);
4620 
4621   getLexer().setSkipSpace(false);
4622 
4623   bool FoundEnd = false;
4624   while (!isToken(AsmToken::Eof)) {
4625     while (isToken(AsmToken::Space)) {
4626       CollectStream << getTokenStr();
4627       Lex();
4628     }
4629 
4630     if (trySkipId(AssemblerDirectiveEnd)) {
4631       FoundEnd = true;
4632       break;
4633     }
4634 
4635     CollectStream << Parser.parseStringToEndOfStatement()
4636                   << getContext().getAsmInfo()->getSeparatorString();
4637 
4638     Parser.eatToEndOfStatement();
4639   }
4640 
4641   getLexer().setSkipSpace(true);
4642 
4643   if (isToken(AsmToken::Eof) && !FoundEnd) {
4644     return TokError(Twine("expected directive ") +
4645                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4646   }
4647 
4648   CollectStream.flush();
4649   return false;
4650 }
4651 
4652 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4653 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4654   std::string String;
4655   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4656                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4657     return true;
4658 
4659   auto PALMetadata = getTargetStreamer().getPALMetadata();
4660   if (!PALMetadata->setFromString(String))
4661     return Error(getLoc(), "invalid PAL metadata");
4662   return false;
4663 }
4664 
4665 /// Parse the assembler directive for old linear-format PAL metadata.
4666 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4667   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4668     return Error(getLoc(),
4669                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4670                  "not available on non-amdpal OSes")).str());
4671   }
4672 
4673   auto PALMetadata = getTargetStreamer().getPALMetadata();
4674   PALMetadata->setLegacy();
4675   for (;;) {
4676     uint32_t Key, Value;
4677     if (ParseAsAbsoluteExpression(Key)) {
4678       return TokError(Twine("invalid value in ") +
4679                       Twine(PALMD::AssemblerDirective));
4680     }
4681     if (!trySkipToken(AsmToken::Comma)) {
4682       return TokError(Twine("expected an even number of values in ") +
4683                       Twine(PALMD::AssemblerDirective));
4684     }
4685     if (ParseAsAbsoluteExpression(Value)) {
4686       return TokError(Twine("invalid value in ") +
4687                       Twine(PALMD::AssemblerDirective));
4688     }
4689     PALMetadata->setRegister(Key, Value);
4690     if (!trySkipToken(AsmToken::Comma))
4691       break;
4692   }
4693   return false;
4694 }
4695 
4696 /// ParseDirectiveAMDGPULDS
4697 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4698 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4699   if (getParser().checkForValidSection())
4700     return true;
4701 
4702   StringRef Name;
4703   SMLoc NameLoc = getLoc();
4704   if (getParser().parseIdentifier(Name))
4705     return TokError("expected identifier in directive");
4706 
4707   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4708   if (parseToken(AsmToken::Comma, "expected ','"))
4709     return true;
4710 
4711   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4712 
4713   int64_t Size;
4714   SMLoc SizeLoc = getLoc();
4715   if (getParser().parseAbsoluteExpression(Size))
4716     return true;
4717   if (Size < 0)
4718     return Error(SizeLoc, "size must be non-negative");
4719   if (Size > LocalMemorySize)
4720     return Error(SizeLoc, "size is too large");
4721 
4722   int64_t Alignment = 4;
4723   if (trySkipToken(AsmToken::Comma)) {
4724     SMLoc AlignLoc = getLoc();
4725     if (getParser().parseAbsoluteExpression(Alignment))
4726       return true;
4727     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4728       return Error(AlignLoc, "alignment must be a power of two");
4729 
4730     // Alignment larger than the size of LDS is possible in theory, as long
4731     // as the linker manages to place to symbol at address 0, but we do want
4732     // to make sure the alignment fits nicely into a 32-bit integer.
4733     if (Alignment >= 1u << 31)
4734       return Error(AlignLoc, "alignment is too large");
4735   }
4736 
4737   if (parseToken(AsmToken::EndOfStatement,
4738                  "unexpected token in '.amdgpu_lds' directive"))
4739     return true;
4740 
4741   Symbol->redefineIfPossible();
4742   if (!Symbol->isUndefined())
4743     return Error(NameLoc, "invalid symbol redefinition");
4744 
4745   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4746   return false;
4747 }
4748 
4749 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4750   StringRef IDVal = DirectiveID.getString();
4751 
4752   if (isHsaAbiVersion3(&getSTI())) {
4753     if (IDVal == ".amdgcn_target")
4754       return ParseDirectiveAMDGCNTarget();
4755 
4756     if (IDVal == ".amdhsa_kernel")
4757       return ParseDirectiveAMDHSAKernel();
4758 
4759     // TODO: Restructure/combine with PAL metadata directive.
4760     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4761       return ParseDirectiveHSAMetadata();
4762   } else {
4763     if (IDVal == ".hsa_code_object_version")
4764       return ParseDirectiveHSACodeObjectVersion();
4765 
4766     if (IDVal == ".hsa_code_object_isa")
4767       return ParseDirectiveHSACodeObjectISA();
4768 
4769     if (IDVal == ".amd_kernel_code_t")
4770       return ParseDirectiveAMDKernelCodeT();
4771 
4772     if (IDVal == ".amdgpu_hsa_kernel")
4773       return ParseDirectiveAMDGPUHsaKernel();
4774 
4775     if (IDVal == ".amd_amdgpu_isa")
4776       return ParseDirectiveISAVersion();
4777 
4778     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4779       return ParseDirectiveHSAMetadata();
4780   }
4781 
4782   if (IDVal == ".amdgpu_lds")
4783     return ParseDirectiveAMDGPULDS();
4784 
4785   if (IDVal == PALMD::AssemblerDirectiveBegin)
4786     return ParseDirectivePALMetadataBegin();
4787 
4788   if (IDVal == PALMD::AssemblerDirective)
4789     return ParseDirectivePALMetadata();
4790 
4791   return true;
4792 }
4793 
4794 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4795                                            unsigned RegNo) const {
4796 
4797   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4798        R.isValid(); ++R) {
4799     if (*R == RegNo)
4800       return isGFX9Plus();
4801   }
4802 
4803   // GFX10 has 2 more SGPRs 104 and 105.
4804   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4805        R.isValid(); ++R) {
4806     if (*R == RegNo)
4807       return hasSGPR104_SGPR105();
4808   }
4809 
4810   switch (RegNo) {
4811   case AMDGPU::SRC_SHARED_BASE:
4812   case AMDGPU::SRC_SHARED_LIMIT:
4813   case AMDGPU::SRC_PRIVATE_BASE:
4814   case AMDGPU::SRC_PRIVATE_LIMIT:
4815   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4816     return isGFX9Plus();
4817   case AMDGPU::TBA:
4818   case AMDGPU::TBA_LO:
4819   case AMDGPU::TBA_HI:
4820   case AMDGPU::TMA:
4821   case AMDGPU::TMA_LO:
4822   case AMDGPU::TMA_HI:
4823     return !isGFX9Plus();
4824   case AMDGPU::XNACK_MASK:
4825   case AMDGPU::XNACK_MASK_LO:
4826   case AMDGPU::XNACK_MASK_HI:
4827     return (isVI() || isGFX9()) && hasXNACK();
4828   case AMDGPU::SGPR_NULL:
4829     return isGFX10Plus();
4830   default:
4831     break;
4832   }
4833 
4834   if (isCI())
4835     return true;
4836 
4837   if (isSI() || isGFX10Plus()) {
4838     // No flat_scr on SI.
4839     // On GFX10 flat scratch is not a valid register operand and can only be
4840     // accessed with s_setreg/s_getreg.
4841     switch (RegNo) {
4842     case AMDGPU::FLAT_SCR:
4843     case AMDGPU::FLAT_SCR_LO:
4844     case AMDGPU::FLAT_SCR_HI:
4845       return false;
4846     default:
4847       return true;
4848     }
4849   }
4850 
4851   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4852   // SI/CI have.
4853   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4854        R.isValid(); ++R) {
4855     if (*R == RegNo)
4856       return hasSGPR102_SGPR103();
4857   }
4858 
4859   return true;
4860 }
4861 
4862 OperandMatchResultTy
4863 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4864                               OperandMode Mode) {
4865   // Try to parse with a custom parser
4866   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4867 
4868   // If we successfully parsed the operand or if there as an error parsing,
4869   // we are done.
4870   //
4871   // If we are parsing after we reach EndOfStatement then this means we
4872   // are appending default values to the Operands list.  This is only done
4873   // by custom parser, so we shouldn't continue on to the generic parsing.
4874   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4875       isToken(AsmToken::EndOfStatement))
4876     return ResTy;
4877 
4878   SMLoc RBraceLoc;
4879   SMLoc LBraceLoc = getLoc();
4880   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
4881     unsigned Prefix = Operands.size();
4882 
4883     for (;;) {
4884       ResTy = parseReg(Operands);
4885       if (ResTy != MatchOperand_Success)
4886         return ResTy;
4887 
4888       RBraceLoc = getLoc();
4889       if (trySkipToken(AsmToken::RBrac))
4890         break;
4891 
4892       if (!trySkipToken(AsmToken::Comma))
4893         return MatchOperand_ParseFail;
4894     }
4895 
4896     if (Operands.size() - Prefix > 1) {
4897       Operands.insert(Operands.begin() + Prefix,
4898                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4899       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
4900     }
4901 
4902     return MatchOperand_Success;
4903   }
4904 
4905   return parseRegOrImm(Operands);
4906 }
4907 
4908 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4909   // Clear any forced encodings from the previous instruction.
4910   setForcedEncodingSize(0);
4911   setForcedDPP(false);
4912   setForcedSDWA(false);
4913 
4914   if (Name.endswith("_e64")) {
4915     setForcedEncodingSize(64);
4916     return Name.substr(0, Name.size() - 4);
4917   } else if (Name.endswith("_e32")) {
4918     setForcedEncodingSize(32);
4919     return Name.substr(0, Name.size() - 4);
4920   } else if (Name.endswith("_dpp")) {
4921     setForcedDPP(true);
4922     return Name.substr(0, Name.size() - 4);
4923   } else if (Name.endswith("_sdwa")) {
4924     setForcedSDWA(true);
4925     return Name.substr(0, Name.size() - 5);
4926   }
4927   return Name;
4928 }
4929 
4930 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4931                                        StringRef Name,
4932                                        SMLoc NameLoc, OperandVector &Operands) {
4933   // Add the instruction mnemonic
4934   Name = parseMnemonicSuffix(Name);
4935   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4936 
4937   bool IsMIMG = Name.startswith("image_");
4938 
4939   while (!trySkipToken(AsmToken::EndOfStatement)) {
4940     OperandMode Mode = OperandMode_Default;
4941     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
4942       Mode = OperandMode_NSA;
4943     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4944 
4945     // Eat the comma or space if there is one.
4946     trySkipToken(AsmToken::Comma);
4947 
4948     if (Res != MatchOperand_Success) {
4949       checkUnsupportedInstruction(Name, NameLoc);
4950       if (!Parser.hasPendingError()) {
4951         // FIXME: use real operand location rather than the current location.
4952         StringRef Msg =
4953           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4954                                             "not a valid operand.";
4955         Error(getLoc(), Msg);
4956       }
4957       while (!trySkipToken(AsmToken::EndOfStatement)) {
4958         lex();
4959       }
4960       return true;
4961     }
4962   }
4963 
4964   return false;
4965 }
4966 
4967 //===----------------------------------------------------------------------===//
4968 // Utility functions
4969 //===----------------------------------------------------------------------===//
4970 
4971 OperandMatchResultTy
4972 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4973 
4974   if (!trySkipId(Prefix, AsmToken::Colon))
4975     return MatchOperand_NoMatch;
4976 
4977   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4978 }
4979 
4980 OperandMatchResultTy
4981 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4982                                     AMDGPUOperand::ImmTy ImmTy,
4983                                     bool (*ConvertResult)(int64_t&)) {
4984   SMLoc S = getLoc();
4985   int64_t Value = 0;
4986 
4987   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4988   if (Res != MatchOperand_Success)
4989     return Res;
4990 
4991   if (ConvertResult && !ConvertResult(Value)) {
4992     Error(S, "invalid " + StringRef(Prefix) + " value.");
4993   }
4994 
4995   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4996   return MatchOperand_Success;
4997 }
4998 
4999 OperandMatchResultTy
5000 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5001                                              OperandVector &Operands,
5002                                              AMDGPUOperand::ImmTy ImmTy,
5003                                              bool (*ConvertResult)(int64_t&)) {
5004   SMLoc S = getLoc();
5005   if (!trySkipId(Prefix, AsmToken::Colon))
5006     return MatchOperand_NoMatch;
5007 
5008   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5009     return MatchOperand_ParseFail;
5010 
5011   unsigned Val = 0;
5012   const unsigned MaxSize = 4;
5013 
5014   // FIXME: How to verify the number of elements matches the number of src
5015   // operands?
5016   for (int I = 0; ; ++I) {
5017     int64_t Op;
5018     SMLoc Loc = getLoc();
5019     if (!parseExpr(Op))
5020       return MatchOperand_ParseFail;
5021 
5022     if (Op != 0 && Op != 1) {
5023       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5024       return MatchOperand_ParseFail;
5025     }
5026 
5027     Val |= (Op << I);
5028 
5029     if (trySkipToken(AsmToken::RBrac))
5030       break;
5031 
5032     if (I + 1 == MaxSize) {
5033       Error(getLoc(), "expected a closing square bracket");
5034       return MatchOperand_ParseFail;
5035     }
5036 
5037     if (!skipToken(AsmToken::Comma, "expected a comma"))
5038       return MatchOperand_ParseFail;
5039   }
5040 
5041   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5042   return MatchOperand_Success;
5043 }
5044 
5045 OperandMatchResultTy
5046 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5047                                AMDGPUOperand::ImmTy ImmTy) {
5048   int64_t Bit = 0;
5049   SMLoc S = getLoc();
5050 
5051   // We are at the end of the statement, and this is a default argument, so
5052   // use a default value.
5053   if (!isToken(AsmToken::EndOfStatement)) {
5054     switch(getTokenKind()) {
5055       case AsmToken::Identifier: {
5056         StringRef Tok = getTokenStr();
5057         if (Tok == Name) {
5058           if (Tok == "r128" && !hasMIMG_R128())
5059             Error(S, "r128 modifier is not supported on this GPU");
5060           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5061             Error(S, "a16 modifier is not supported on this GPU");
5062           Bit = 1;
5063           Parser.Lex();
5064         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5065           Bit = 0;
5066           Parser.Lex();
5067         } else {
5068           return MatchOperand_NoMatch;
5069         }
5070         break;
5071       }
5072       default:
5073         return MatchOperand_NoMatch;
5074     }
5075   }
5076 
5077   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
5078     return MatchOperand_ParseFail;
5079 
5080   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5081     ImmTy = AMDGPUOperand::ImmTyR128A16;
5082 
5083   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5084   return MatchOperand_Success;
5085 }
5086 
5087 static void addOptionalImmOperand(
5088   MCInst& Inst, const OperandVector& Operands,
5089   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5090   AMDGPUOperand::ImmTy ImmT,
5091   int64_t Default = 0) {
5092   auto i = OptionalIdx.find(ImmT);
5093   if (i != OptionalIdx.end()) {
5094     unsigned Idx = i->second;
5095     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5096   } else {
5097     Inst.addOperand(MCOperand::createImm(Default));
5098   }
5099 }
5100 
5101 OperandMatchResultTy
5102 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
5103   if (!trySkipId(Prefix, AsmToken::Colon))
5104     return MatchOperand_NoMatch;
5105 
5106   return parseId(Value) ? MatchOperand_Success : MatchOperand_ParseFail;
5107 }
5108 
5109 //===----------------------------------------------------------------------===//
5110 // MTBUF format
5111 //===----------------------------------------------------------------------===//
5112 
5113 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5114                                   int64_t MaxVal,
5115                                   int64_t &Fmt) {
5116   int64_t Val;
5117   SMLoc Loc = getLoc();
5118 
5119   auto Res = parseIntWithPrefix(Pref, Val);
5120   if (Res == MatchOperand_ParseFail)
5121     return false;
5122   if (Res == MatchOperand_NoMatch)
5123     return true;
5124 
5125   if (Val < 0 || Val > MaxVal) {
5126     Error(Loc, Twine("out of range ", StringRef(Pref)));
5127     return false;
5128   }
5129 
5130   Fmt = Val;
5131   return true;
5132 }
5133 
5134 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5135 // values to live in a joint format operand in the MCInst encoding.
5136 OperandMatchResultTy
5137 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5138   using namespace llvm::AMDGPU::MTBUFFormat;
5139 
5140   int64_t Dfmt = DFMT_UNDEF;
5141   int64_t Nfmt = NFMT_UNDEF;
5142 
5143   // dfmt and nfmt can appear in either order, and each is optional.
5144   for (int I = 0; I < 2; ++I) {
5145     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5146       return MatchOperand_ParseFail;
5147 
5148     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5149       return MatchOperand_ParseFail;
5150     }
5151     // Skip optional comma between dfmt/nfmt
5152     // but guard against 2 commas following each other.
5153     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5154         !peekToken().is(AsmToken::Comma)) {
5155       trySkipToken(AsmToken::Comma);
5156     }
5157   }
5158 
5159   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5160     return MatchOperand_NoMatch;
5161 
5162   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5163   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5164 
5165   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5166   return MatchOperand_Success;
5167 }
5168 
5169 OperandMatchResultTy
5170 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5171   using namespace llvm::AMDGPU::MTBUFFormat;
5172 
5173   int64_t Fmt = UFMT_UNDEF;
5174 
5175   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5176     return MatchOperand_ParseFail;
5177 
5178   if (Fmt == UFMT_UNDEF)
5179     return MatchOperand_NoMatch;
5180 
5181   Format = Fmt;
5182   return MatchOperand_Success;
5183 }
5184 
5185 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5186                                     int64_t &Nfmt,
5187                                     StringRef FormatStr,
5188                                     SMLoc Loc) {
5189   using namespace llvm::AMDGPU::MTBUFFormat;
5190   int64_t Format;
5191 
5192   Format = getDfmt(FormatStr);
5193   if (Format != DFMT_UNDEF) {
5194     Dfmt = Format;
5195     return true;
5196   }
5197 
5198   Format = getNfmt(FormatStr, getSTI());
5199   if (Format != NFMT_UNDEF) {
5200     Nfmt = Format;
5201     return true;
5202   }
5203 
5204   Error(Loc, "unsupported format");
5205   return false;
5206 }
5207 
5208 OperandMatchResultTy
5209 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5210                                           SMLoc FormatLoc,
5211                                           int64_t &Format) {
5212   using namespace llvm::AMDGPU::MTBUFFormat;
5213 
5214   int64_t Dfmt = DFMT_UNDEF;
5215   int64_t Nfmt = NFMT_UNDEF;
5216   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5217     return MatchOperand_ParseFail;
5218 
5219   if (trySkipToken(AsmToken::Comma)) {
5220     StringRef Str;
5221     SMLoc Loc = getLoc();
5222     if (!parseId(Str, "expected a format string") ||
5223         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5224       return MatchOperand_ParseFail;
5225     }
5226     if (Dfmt == DFMT_UNDEF) {
5227       Error(Loc, "duplicate numeric format");
5228       return MatchOperand_ParseFail;
5229     } else if (Nfmt == NFMT_UNDEF) {
5230       Error(Loc, "duplicate data format");
5231       return MatchOperand_ParseFail;
5232     }
5233   }
5234 
5235   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5236   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5237 
5238   if (isGFX10Plus()) {
5239     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5240     if (Ufmt == UFMT_UNDEF) {
5241       Error(FormatLoc, "unsupported format");
5242       return MatchOperand_ParseFail;
5243     }
5244     Format = Ufmt;
5245   } else {
5246     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5247   }
5248 
5249   return MatchOperand_Success;
5250 }
5251 
5252 OperandMatchResultTy
5253 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5254                                             SMLoc Loc,
5255                                             int64_t &Format) {
5256   using namespace llvm::AMDGPU::MTBUFFormat;
5257 
5258   auto Id = getUnifiedFormat(FormatStr);
5259   if (Id == UFMT_UNDEF)
5260     return MatchOperand_NoMatch;
5261 
5262   if (!isGFX10Plus()) {
5263     Error(Loc, "unified format is not supported on this GPU");
5264     return MatchOperand_ParseFail;
5265   }
5266 
5267   Format = Id;
5268   return MatchOperand_Success;
5269 }
5270 
5271 OperandMatchResultTy
5272 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5273   using namespace llvm::AMDGPU::MTBUFFormat;
5274   SMLoc Loc = getLoc();
5275 
5276   if (!parseExpr(Format))
5277     return MatchOperand_ParseFail;
5278   if (!isValidFormatEncoding(Format, getSTI())) {
5279     Error(Loc, "out of range format");
5280     return MatchOperand_ParseFail;
5281   }
5282 
5283   return MatchOperand_Success;
5284 }
5285 
5286 OperandMatchResultTy
5287 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5288   using namespace llvm::AMDGPU::MTBUFFormat;
5289 
5290   if (!trySkipId("format", AsmToken::Colon))
5291     return MatchOperand_NoMatch;
5292 
5293   if (trySkipToken(AsmToken::LBrac)) {
5294     StringRef FormatStr;
5295     SMLoc Loc = getLoc();
5296     if (!parseId(FormatStr, "expected a format string"))
5297       return MatchOperand_ParseFail;
5298 
5299     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5300     if (Res == MatchOperand_NoMatch)
5301       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5302     if (Res != MatchOperand_Success)
5303       return Res;
5304 
5305     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5306       return MatchOperand_ParseFail;
5307 
5308     return MatchOperand_Success;
5309   }
5310 
5311   return parseNumericFormat(Format);
5312 }
5313 
5314 OperandMatchResultTy
5315 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5316   using namespace llvm::AMDGPU::MTBUFFormat;
5317 
5318   int64_t Format = getDefaultFormatEncoding(getSTI());
5319   OperandMatchResultTy Res;
5320   SMLoc Loc = getLoc();
5321 
5322   // Parse legacy format syntax.
5323   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5324   if (Res == MatchOperand_ParseFail)
5325     return Res;
5326 
5327   bool FormatFound = (Res == MatchOperand_Success);
5328 
5329   Operands.push_back(
5330     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5331 
5332   if (FormatFound)
5333     trySkipToken(AsmToken::Comma);
5334 
5335   if (isToken(AsmToken::EndOfStatement)) {
5336     // We are expecting an soffset operand,
5337     // but let matcher handle the error.
5338     return MatchOperand_Success;
5339   }
5340 
5341   // Parse soffset.
5342   Res = parseRegOrImm(Operands);
5343   if (Res != MatchOperand_Success)
5344     return Res;
5345 
5346   trySkipToken(AsmToken::Comma);
5347 
5348   if (!FormatFound) {
5349     Res = parseSymbolicOrNumericFormat(Format);
5350     if (Res == MatchOperand_ParseFail)
5351       return Res;
5352     if (Res == MatchOperand_Success) {
5353       auto Size = Operands.size();
5354       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5355       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5356       Op.setImm(Format);
5357     }
5358     return MatchOperand_Success;
5359   }
5360 
5361   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5362     Error(getLoc(), "duplicate format");
5363     return MatchOperand_ParseFail;
5364   }
5365   return MatchOperand_Success;
5366 }
5367 
5368 //===----------------------------------------------------------------------===//
5369 // ds
5370 //===----------------------------------------------------------------------===//
5371 
5372 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5373                                     const OperandVector &Operands) {
5374   OptionalImmIndexMap OptionalIdx;
5375 
5376   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5377     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5378 
5379     // Add the register arguments
5380     if (Op.isReg()) {
5381       Op.addRegOperands(Inst, 1);
5382       continue;
5383     }
5384 
5385     // Handle optional arguments
5386     OptionalIdx[Op.getImmTy()] = i;
5387   }
5388 
5389   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5390   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5391   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5392 
5393   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5394 }
5395 
5396 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5397                                 bool IsGdsHardcoded) {
5398   OptionalImmIndexMap OptionalIdx;
5399 
5400   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5401     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5402 
5403     // Add the register arguments
5404     if (Op.isReg()) {
5405       Op.addRegOperands(Inst, 1);
5406       continue;
5407     }
5408 
5409     if (Op.isToken() && Op.getToken() == "gds") {
5410       IsGdsHardcoded = true;
5411       continue;
5412     }
5413 
5414     // Handle optional arguments
5415     OptionalIdx[Op.getImmTy()] = i;
5416   }
5417 
5418   AMDGPUOperand::ImmTy OffsetType =
5419     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5420      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5421      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5422                                                       AMDGPUOperand::ImmTyOffset;
5423 
5424   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5425 
5426   if (!IsGdsHardcoded) {
5427     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5428   }
5429   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5430 }
5431 
5432 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5433   OptionalImmIndexMap OptionalIdx;
5434 
5435   unsigned OperandIdx[4];
5436   unsigned EnMask = 0;
5437   int SrcIdx = 0;
5438 
5439   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5440     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5441 
5442     // Add the register arguments
5443     if (Op.isReg()) {
5444       assert(SrcIdx < 4);
5445       OperandIdx[SrcIdx] = Inst.size();
5446       Op.addRegOperands(Inst, 1);
5447       ++SrcIdx;
5448       continue;
5449     }
5450 
5451     if (Op.isOff()) {
5452       assert(SrcIdx < 4);
5453       OperandIdx[SrcIdx] = Inst.size();
5454       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5455       ++SrcIdx;
5456       continue;
5457     }
5458 
5459     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5460       Op.addImmOperands(Inst, 1);
5461       continue;
5462     }
5463 
5464     if (Op.isToken() && Op.getToken() == "done")
5465       continue;
5466 
5467     // Handle optional arguments
5468     OptionalIdx[Op.getImmTy()] = i;
5469   }
5470 
5471   assert(SrcIdx == 4);
5472 
5473   bool Compr = false;
5474   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5475     Compr = true;
5476     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5477     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5478     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5479   }
5480 
5481   for (auto i = 0; i < SrcIdx; ++i) {
5482     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5483       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5484     }
5485   }
5486 
5487   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5488   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5489 
5490   Inst.addOperand(MCOperand::createImm(EnMask));
5491 }
5492 
5493 //===----------------------------------------------------------------------===//
5494 // s_waitcnt
5495 //===----------------------------------------------------------------------===//
5496 
5497 static bool
5498 encodeCnt(
5499   const AMDGPU::IsaVersion ISA,
5500   int64_t &IntVal,
5501   int64_t CntVal,
5502   bool Saturate,
5503   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5504   unsigned (*decode)(const IsaVersion &Version, unsigned))
5505 {
5506   bool Failed = false;
5507 
5508   IntVal = encode(ISA, IntVal, CntVal);
5509   if (CntVal != decode(ISA, IntVal)) {
5510     if (Saturate) {
5511       IntVal = encode(ISA, IntVal, -1);
5512     } else {
5513       Failed = true;
5514     }
5515   }
5516   return Failed;
5517 }
5518 
5519 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5520 
5521   SMLoc CntLoc = getLoc();
5522   StringRef CntName = getTokenStr();
5523 
5524   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5525       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5526     return false;
5527 
5528   int64_t CntVal;
5529   SMLoc ValLoc = getLoc();
5530   if (!parseExpr(CntVal))
5531     return false;
5532 
5533   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5534 
5535   bool Failed = true;
5536   bool Sat = CntName.endswith("_sat");
5537 
5538   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5539     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5540   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5541     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5542   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5543     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5544   } else {
5545     Error(CntLoc, "invalid counter name " + CntName);
5546     return false;
5547   }
5548 
5549   if (Failed) {
5550     Error(ValLoc, "too large value for " + CntName);
5551     return false;
5552   }
5553 
5554   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5555     return false;
5556 
5557   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5558     if (isToken(AsmToken::EndOfStatement)) {
5559       Error(getLoc(), "expected a counter name");
5560       return false;
5561     }
5562   }
5563 
5564   return true;
5565 }
5566 
5567 OperandMatchResultTy
5568 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5569   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5570   int64_t Waitcnt = getWaitcntBitMask(ISA);
5571   SMLoc S = getLoc();
5572 
5573   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5574     while (!isToken(AsmToken::EndOfStatement)) {
5575       if (!parseCnt(Waitcnt))
5576         return MatchOperand_ParseFail;
5577     }
5578   } else {
5579     if (!parseExpr(Waitcnt))
5580       return MatchOperand_ParseFail;
5581   }
5582 
5583   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5584   return MatchOperand_Success;
5585 }
5586 
5587 bool
5588 AMDGPUOperand::isSWaitCnt() const {
5589   return isImm();
5590 }
5591 
5592 //===----------------------------------------------------------------------===//
5593 // hwreg
5594 //===----------------------------------------------------------------------===//
5595 
5596 bool
5597 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5598                                 OperandInfoTy &Offset,
5599                                 OperandInfoTy &Width) {
5600   using namespace llvm::AMDGPU::Hwreg;
5601 
5602   // The register may be specified by name or using a numeric code
5603   HwReg.Loc = getLoc();
5604   if (isToken(AsmToken::Identifier) &&
5605       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5606     HwReg.IsSymbolic = true;
5607     lex(); // skip register name
5608   } else if (!parseExpr(HwReg.Id, "a register name")) {
5609     return false;
5610   }
5611 
5612   if (trySkipToken(AsmToken::RParen))
5613     return true;
5614 
5615   // parse optional params
5616   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5617     return false;
5618 
5619   Offset.Loc = getLoc();
5620   if (!parseExpr(Offset.Id))
5621     return false;
5622 
5623   if (!skipToken(AsmToken::Comma, "expected a comma"))
5624     return false;
5625 
5626   Width.Loc = getLoc();
5627   return parseExpr(Width.Id) &&
5628          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5629 }
5630 
5631 bool
5632 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5633                                const OperandInfoTy &Offset,
5634                                const OperandInfoTy &Width) {
5635 
5636   using namespace llvm::AMDGPU::Hwreg;
5637 
5638   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5639     Error(HwReg.Loc,
5640           "specified hardware register is not supported on this GPU");
5641     return false;
5642   }
5643   if (!isValidHwreg(HwReg.Id)) {
5644     Error(HwReg.Loc,
5645           "invalid code of hardware register: only 6-bit values are legal");
5646     return false;
5647   }
5648   if (!isValidHwregOffset(Offset.Id)) {
5649     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5650     return false;
5651   }
5652   if (!isValidHwregWidth(Width.Id)) {
5653     Error(Width.Loc,
5654           "invalid bitfield width: only values from 1 to 32 are legal");
5655     return false;
5656   }
5657   return true;
5658 }
5659 
5660 OperandMatchResultTy
5661 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5662   using namespace llvm::AMDGPU::Hwreg;
5663 
5664   int64_t ImmVal = 0;
5665   SMLoc Loc = getLoc();
5666 
5667   if (trySkipId("hwreg", AsmToken::LParen)) {
5668     OperandInfoTy HwReg(ID_UNKNOWN_);
5669     OperandInfoTy Offset(OFFSET_DEFAULT_);
5670     OperandInfoTy Width(WIDTH_DEFAULT_);
5671     if (parseHwregBody(HwReg, Offset, Width) &&
5672         validateHwreg(HwReg, Offset, Width)) {
5673       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5674     } else {
5675       return MatchOperand_ParseFail;
5676     }
5677   } else if (parseExpr(ImmVal, "a hwreg macro")) {
5678     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5679       Error(Loc, "invalid immediate: only 16-bit values are legal");
5680       return MatchOperand_ParseFail;
5681     }
5682   } else {
5683     return MatchOperand_ParseFail;
5684   }
5685 
5686   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5687   return MatchOperand_Success;
5688 }
5689 
5690 bool AMDGPUOperand::isHwreg() const {
5691   return isImmTy(ImmTyHwreg);
5692 }
5693 
5694 //===----------------------------------------------------------------------===//
5695 // sendmsg
5696 //===----------------------------------------------------------------------===//
5697 
5698 bool
5699 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5700                                   OperandInfoTy &Op,
5701                                   OperandInfoTy &Stream) {
5702   using namespace llvm::AMDGPU::SendMsg;
5703 
5704   Msg.Loc = getLoc();
5705   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5706     Msg.IsSymbolic = true;
5707     lex(); // skip message name
5708   } else if (!parseExpr(Msg.Id, "a message name")) {
5709     return false;
5710   }
5711 
5712   if (trySkipToken(AsmToken::Comma)) {
5713     Op.IsDefined = true;
5714     Op.Loc = getLoc();
5715     if (isToken(AsmToken::Identifier) &&
5716         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5717       lex(); // skip operation name
5718     } else if (!parseExpr(Op.Id, "an operation name")) {
5719       return false;
5720     }
5721 
5722     if (trySkipToken(AsmToken::Comma)) {
5723       Stream.IsDefined = true;
5724       Stream.Loc = getLoc();
5725       if (!parseExpr(Stream.Id))
5726         return false;
5727     }
5728   }
5729 
5730   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5731 }
5732 
5733 bool
5734 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5735                                  const OperandInfoTy &Op,
5736                                  const OperandInfoTy &Stream) {
5737   using namespace llvm::AMDGPU::SendMsg;
5738 
5739   // Validation strictness depends on whether message is specified
5740   // in a symbolc or in a numeric form. In the latter case
5741   // only encoding possibility is checked.
5742   bool Strict = Msg.IsSymbolic;
5743 
5744   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5745     Error(Msg.Loc, "invalid message id");
5746     return false;
5747   }
5748   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5749     if (Op.IsDefined) {
5750       Error(Op.Loc, "message does not support operations");
5751     } else {
5752       Error(Msg.Loc, "missing message operation");
5753     }
5754     return false;
5755   }
5756   if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5757     Error(Op.Loc, "invalid operation id");
5758     return false;
5759   }
5760   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5761     Error(Stream.Loc, "message operation does not support streams");
5762     return false;
5763   }
5764   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5765     Error(Stream.Loc, "invalid message stream id");
5766     return false;
5767   }
5768   return true;
5769 }
5770 
5771 OperandMatchResultTy
5772 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5773   using namespace llvm::AMDGPU::SendMsg;
5774 
5775   int64_t ImmVal = 0;
5776   SMLoc Loc = getLoc();
5777 
5778   if (trySkipId("sendmsg", AsmToken::LParen)) {
5779     OperandInfoTy Msg(ID_UNKNOWN_);
5780     OperandInfoTy Op(OP_NONE_);
5781     OperandInfoTy Stream(STREAM_ID_NONE_);
5782     if (parseSendMsgBody(Msg, Op, Stream) &&
5783         validateSendMsg(Msg, Op, Stream)) {
5784       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5785     } else {
5786       return MatchOperand_ParseFail;
5787     }
5788   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
5789     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5790       Error(Loc, "invalid immediate: only 16-bit values are legal");
5791       return MatchOperand_ParseFail;
5792     }
5793   } else {
5794     return MatchOperand_ParseFail;
5795   }
5796 
5797   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5798   return MatchOperand_Success;
5799 }
5800 
5801 bool AMDGPUOperand::isSendMsg() const {
5802   return isImmTy(ImmTySendMsg);
5803 }
5804 
5805 //===----------------------------------------------------------------------===//
5806 // v_interp
5807 //===----------------------------------------------------------------------===//
5808 
5809 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5810   StringRef Str;
5811   SMLoc S = getLoc();
5812 
5813   if (!parseId(Str))
5814     return MatchOperand_NoMatch;
5815 
5816   int Slot = StringSwitch<int>(Str)
5817     .Case("p10", 0)
5818     .Case("p20", 1)
5819     .Case("p0", 2)
5820     .Default(-1);
5821 
5822   if (Slot == -1) {
5823     Error(S, "invalid interpolation slot");
5824     return MatchOperand_ParseFail;
5825   }
5826 
5827   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5828                                               AMDGPUOperand::ImmTyInterpSlot));
5829   return MatchOperand_Success;
5830 }
5831 
5832 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5833   StringRef Str;
5834   SMLoc S = getLoc();
5835 
5836   if (!parseId(Str))
5837     return MatchOperand_NoMatch;
5838 
5839   if (!Str.startswith("attr")) {
5840     Error(S, "invalid interpolation attribute");
5841     return MatchOperand_ParseFail;
5842   }
5843 
5844   StringRef Chan = Str.take_back(2);
5845   int AttrChan = StringSwitch<int>(Chan)
5846     .Case(".x", 0)
5847     .Case(".y", 1)
5848     .Case(".z", 2)
5849     .Case(".w", 3)
5850     .Default(-1);
5851   if (AttrChan == -1) {
5852     Error(S, "invalid or missing interpolation attribute channel");
5853     return MatchOperand_ParseFail;
5854   }
5855 
5856   Str = Str.drop_back(2).drop_front(4);
5857 
5858   uint8_t Attr;
5859   if (Str.getAsInteger(10, Attr)) {
5860     Error(S, "invalid or missing interpolation attribute number");
5861     return MatchOperand_ParseFail;
5862   }
5863 
5864   if (Attr > 63) {
5865     Error(S, "out of bounds interpolation attribute number");
5866     return MatchOperand_ParseFail;
5867   }
5868 
5869   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5870 
5871   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5872                                               AMDGPUOperand::ImmTyInterpAttr));
5873   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5874                                               AMDGPUOperand::ImmTyAttrChan));
5875   return MatchOperand_Success;
5876 }
5877 
5878 //===----------------------------------------------------------------------===//
5879 // exp
5880 //===----------------------------------------------------------------------===//
5881 
5882 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5883                                                       uint8_t &Val) {
5884   if (Str == "null") {
5885     Val = Exp::ET_NULL;
5886     return MatchOperand_Success;
5887   }
5888 
5889   if (Str.startswith("mrt")) {
5890     Str = Str.drop_front(3);
5891     if (Str == "z") { // == mrtz
5892       Val = Exp::ET_MRTZ;
5893       return MatchOperand_Success;
5894     }
5895 
5896     if (Str.getAsInteger(10, Val))
5897       return MatchOperand_ParseFail;
5898 
5899     if (Val > Exp::ET_MRT7)
5900       return MatchOperand_ParseFail;
5901 
5902     return MatchOperand_Success;
5903   }
5904 
5905   if (Str.startswith("pos")) {
5906     Str = Str.drop_front(3);
5907     if (Str.getAsInteger(10, Val))
5908       return MatchOperand_ParseFail;
5909 
5910     if (Val > (isGFX10Plus() ? 4 : 3))
5911       return MatchOperand_ParseFail;
5912 
5913     Val += Exp::ET_POS0;
5914     return MatchOperand_Success;
5915   }
5916 
5917   if (isGFX10Plus() && Str == "prim") {
5918     Val = Exp::ET_PRIM;
5919     return MatchOperand_Success;
5920   }
5921 
5922   if (Str.startswith("param")) {
5923     Str = Str.drop_front(5);
5924     if (Str.getAsInteger(10, Val))
5925       return MatchOperand_ParseFail;
5926 
5927     if (Val >= 32)
5928       return MatchOperand_ParseFail;
5929 
5930     Val += Exp::ET_PARAM0;
5931     return MatchOperand_Success;
5932   }
5933 
5934   return MatchOperand_ParseFail;
5935 }
5936 
5937 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5938   StringRef Str;
5939   SMLoc S = getLoc();
5940 
5941   if (!parseId(Str))
5942     return MatchOperand_NoMatch;
5943 
5944   uint8_t Val;
5945   auto Res = parseExpTgtImpl(Str, Val);
5946   if (Res != MatchOperand_Success) {
5947     Error(S, "invalid exp target");
5948     return Res;
5949   }
5950 
5951   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5952                                               AMDGPUOperand::ImmTyExpTgt));
5953   return MatchOperand_Success;
5954 }
5955 
5956 //===----------------------------------------------------------------------===//
5957 // parser helpers
5958 //===----------------------------------------------------------------------===//
5959 
5960 bool
5961 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5962   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5963 }
5964 
5965 bool
5966 AMDGPUAsmParser::isId(const StringRef Id) const {
5967   return isId(getToken(), Id);
5968 }
5969 
5970 bool
5971 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5972   return getTokenKind() == Kind;
5973 }
5974 
5975 bool
5976 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5977   if (isId(Id)) {
5978     lex();
5979     return true;
5980   }
5981   return false;
5982 }
5983 
5984 bool
5985 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5986   if (isId(Id) && peekToken().is(Kind)) {
5987     lex();
5988     lex();
5989     return true;
5990   }
5991   return false;
5992 }
5993 
5994 bool
5995 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5996   if (isToken(Kind)) {
5997     lex();
5998     return true;
5999   }
6000   return false;
6001 }
6002 
6003 bool
6004 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6005                            const StringRef ErrMsg) {
6006   if (!trySkipToken(Kind)) {
6007     Error(getLoc(), ErrMsg);
6008     return false;
6009   }
6010   return true;
6011 }
6012 
6013 bool
6014 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6015   SMLoc S = getLoc();
6016 
6017   const MCExpr *Expr;
6018   if (Parser.parseExpression(Expr))
6019     return false;
6020 
6021   if (Expr->evaluateAsAbsolute(Imm))
6022     return true;
6023 
6024   if (Expected.empty()) {
6025     Error(S, "expected absolute expression");
6026   } else {
6027     Error(S, Twine("expected ", Expected) +
6028              Twine(" or an absolute expression"));
6029   }
6030   return false;
6031 }
6032 
6033 bool
6034 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6035   SMLoc S = getLoc();
6036 
6037   const MCExpr *Expr;
6038   if (Parser.parseExpression(Expr))
6039     return false;
6040 
6041   int64_t IntVal;
6042   if (Expr->evaluateAsAbsolute(IntVal)) {
6043     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6044   } else {
6045     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6046   }
6047   return true;
6048 }
6049 
6050 bool
6051 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6052   if (isToken(AsmToken::String)) {
6053     Val = getToken().getStringContents();
6054     lex();
6055     return true;
6056   } else {
6057     Error(getLoc(), ErrMsg);
6058     return false;
6059   }
6060 }
6061 
6062 bool
6063 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6064   if (isToken(AsmToken::Identifier)) {
6065     Val = getTokenStr();
6066     lex();
6067     return true;
6068   } else {
6069     if (!ErrMsg.empty())
6070       Error(getLoc(), ErrMsg);
6071     return false;
6072   }
6073 }
6074 
6075 AsmToken
6076 AMDGPUAsmParser::getToken() const {
6077   return Parser.getTok();
6078 }
6079 
6080 AsmToken
6081 AMDGPUAsmParser::peekToken() {
6082   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6083 }
6084 
6085 void
6086 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6087   auto TokCount = getLexer().peekTokens(Tokens);
6088 
6089   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6090     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6091 }
6092 
6093 AsmToken::TokenKind
6094 AMDGPUAsmParser::getTokenKind() const {
6095   return getLexer().getKind();
6096 }
6097 
6098 SMLoc
6099 AMDGPUAsmParser::getLoc() const {
6100   return getToken().getLoc();
6101 }
6102 
6103 StringRef
6104 AMDGPUAsmParser::getTokenStr() const {
6105   return getToken().getString();
6106 }
6107 
6108 void
6109 AMDGPUAsmParser::lex() {
6110   Parser.Lex();
6111 }
6112 
6113 SMLoc
6114 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6115                                const OperandVector &Operands) const {
6116   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6117     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6118     if (Test(Op))
6119       return Op.getStartLoc();
6120   }
6121   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6122 }
6123 
6124 SMLoc
6125 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6126                            const OperandVector &Operands) const {
6127   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6128   return getOperandLoc(Test, Operands);
6129 }
6130 
6131 SMLoc
6132 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6133                            const OperandVector &Operands) const {
6134   auto Test = [=](const AMDGPUOperand& Op) {
6135     return Op.isRegKind() && Op.getReg() == Reg;
6136   };
6137   return getOperandLoc(Test, Operands);
6138 }
6139 
6140 SMLoc
6141 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6142   auto Test = [](const AMDGPUOperand& Op) {
6143     return Op.IsImmKindLiteral() || Op.isExpr();
6144   };
6145   return getOperandLoc(Test, Operands);
6146 }
6147 
6148 SMLoc
6149 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6150   auto Test = [](const AMDGPUOperand& Op) {
6151     return Op.isImmKindConst();
6152   };
6153   return getOperandLoc(Test, Operands);
6154 }
6155 
6156 //===----------------------------------------------------------------------===//
6157 // swizzle
6158 //===----------------------------------------------------------------------===//
6159 
6160 LLVM_READNONE
6161 static unsigned
6162 encodeBitmaskPerm(const unsigned AndMask,
6163                   const unsigned OrMask,
6164                   const unsigned XorMask) {
6165   using namespace llvm::AMDGPU::Swizzle;
6166 
6167   return BITMASK_PERM_ENC |
6168          (AndMask << BITMASK_AND_SHIFT) |
6169          (OrMask  << BITMASK_OR_SHIFT)  |
6170          (XorMask << BITMASK_XOR_SHIFT);
6171 }
6172 
6173 bool
6174 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6175                                      const unsigned MinVal,
6176                                      const unsigned MaxVal,
6177                                      const StringRef ErrMsg,
6178                                      SMLoc &Loc) {
6179   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6180     return false;
6181   }
6182   Loc = getLoc();
6183   if (!parseExpr(Op)) {
6184     return false;
6185   }
6186   if (Op < MinVal || Op > MaxVal) {
6187     Error(Loc, ErrMsg);
6188     return false;
6189   }
6190 
6191   return true;
6192 }
6193 
6194 bool
6195 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6196                                       const unsigned MinVal,
6197                                       const unsigned MaxVal,
6198                                       const StringRef ErrMsg) {
6199   SMLoc Loc;
6200   for (unsigned i = 0; i < OpNum; ++i) {
6201     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6202       return false;
6203   }
6204 
6205   return true;
6206 }
6207 
6208 bool
6209 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6210   using namespace llvm::AMDGPU::Swizzle;
6211 
6212   int64_t Lane[LANE_NUM];
6213   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6214                            "expected a 2-bit lane id")) {
6215     Imm = QUAD_PERM_ENC;
6216     for (unsigned I = 0; I < LANE_NUM; ++I) {
6217       Imm |= Lane[I] << (LANE_SHIFT * I);
6218     }
6219     return true;
6220   }
6221   return false;
6222 }
6223 
6224 bool
6225 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6226   using namespace llvm::AMDGPU::Swizzle;
6227 
6228   SMLoc Loc;
6229   int64_t GroupSize;
6230   int64_t LaneIdx;
6231 
6232   if (!parseSwizzleOperand(GroupSize,
6233                            2, 32,
6234                            "group size must be in the interval [2,32]",
6235                            Loc)) {
6236     return false;
6237   }
6238   if (!isPowerOf2_64(GroupSize)) {
6239     Error(Loc, "group size must be a power of two");
6240     return false;
6241   }
6242   if (parseSwizzleOperand(LaneIdx,
6243                           0, GroupSize - 1,
6244                           "lane id must be in the interval [0,group size - 1]",
6245                           Loc)) {
6246     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6247     return true;
6248   }
6249   return false;
6250 }
6251 
6252 bool
6253 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6254   using namespace llvm::AMDGPU::Swizzle;
6255 
6256   SMLoc Loc;
6257   int64_t GroupSize;
6258 
6259   if (!parseSwizzleOperand(GroupSize,
6260                            2, 32,
6261                            "group size must be in the interval [2,32]",
6262                            Loc)) {
6263     return false;
6264   }
6265   if (!isPowerOf2_64(GroupSize)) {
6266     Error(Loc, "group size must be a power of two");
6267     return false;
6268   }
6269 
6270   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6271   return true;
6272 }
6273 
6274 bool
6275 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6276   using namespace llvm::AMDGPU::Swizzle;
6277 
6278   SMLoc Loc;
6279   int64_t GroupSize;
6280 
6281   if (!parseSwizzleOperand(GroupSize,
6282                            1, 16,
6283                            "group size must be in the interval [1,16]",
6284                            Loc)) {
6285     return false;
6286   }
6287   if (!isPowerOf2_64(GroupSize)) {
6288     Error(Loc, "group size must be a power of two");
6289     return false;
6290   }
6291 
6292   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6293   return true;
6294 }
6295 
6296 bool
6297 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6298   using namespace llvm::AMDGPU::Swizzle;
6299 
6300   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6301     return false;
6302   }
6303 
6304   StringRef Ctl;
6305   SMLoc StrLoc = getLoc();
6306   if (!parseString(Ctl)) {
6307     return false;
6308   }
6309   if (Ctl.size() != BITMASK_WIDTH) {
6310     Error(StrLoc, "expected a 5-character mask");
6311     return false;
6312   }
6313 
6314   unsigned AndMask = 0;
6315   unsigned OrMask = 0;
6316   unsigned XorMask = 0;
6317 
6318   for (size_t i = 0; i < Ctl.size(); ++i) {
6319     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6320     switch(Ctl[i]) {
6321     default:
6322       Error(StrLoc, "invalid mask");
6323       return false;
6324     case '0':
6325       break;
6326     case '1':
6327       OrMask |= Mask;
6328       break;
6329     case 'p':
6330       AndMask |= Mask;
6331       break;
6332     case 'i':
6333       AndMask |= Mask;
6334       XorMask |= Mask;
6335       break;
6336     }
6337   }
6338 
6339   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6340   return true;
6341 }
6342 
6343 bool
6344 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6345 
6346   SMLoc OffsetLoc = getLoc();
6347 
6348   if (!parseExpr(Imm, "a swizzle macro")) {
6349     return false;
6350   }
6351   if (!isUInt<16>(Imm)) {
6352     Error(OffsetLoc, "expected a 16-bit offset");
6353     return false;
6354   }
6355   return true;
6356 }
6357 
6358 bool
6359 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6360   using namespace llvm::AMDGPU::Swizzle;
6361 
6362   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6363 
6364     SMLoc ModeLoc = getLoc();
6365     bool Ok = false;
6366 
6367     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6368       Ok = parseSwizzleQuadPerm(Imm);
6369     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6370       Ok = parseSwizzleBitmaskPerm(Imm);
6371     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6372       Ok = parseSwizzleBroadcast(Imm);
6373     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6374       Ok = parseSwizzleSwap(Imm);
6375     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6376       Ok = parseSwizzleReverse(Imm);
6377     } else {
6378       Error(ModeLoc, "expected a swizzle mode");
6379     }
6380 
6381     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6382   }
6383 
6384   return false;
6385 }
6386 
6387 OperandMatchResultTy
6388 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6389   SMLoc S = getLoc();
6390   int64_t Imm = 0;
6391 
6392   if (trySkipId("offset")) {
6393 
6394     bool Ok = false;
6395     if (skipToken(AsmToken::Colon, "expected a colon")) {
6396       if (trySkipId("swizzle")) {
6397         Ok = parseSwizzleMacro(Imm);
6398       } else {
6399         Ok = parseSwizzleOffset(Imm);
6400       }
6401     }
6402 
6403     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6404 
6405     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6406   } else {
6407     // Swizzle "offset" operand is optional.
6408     // If it is omitted, try parsing other optional operands.
6409     return parseOptionalOpr(Operands);
6410   }
6411 }
6412 
6413 bool
6414 AMDGPUOperand::isSwizzle() const {
6415   return isImmTy(ImmTySwizzle);
6416 }
6417 
6418 //===----------------------------------------------------------------------===//
6419 // VGPR Index Mode
6420 //===----------------------------------------------------------------------===//
6421 
6422 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6423 
6424   using namespace llvm::AMDGPU::VGPRIndexMode;
6425 
6426   if (trySkipToken(AsmToken::RParen)) {
6427     return OFF;
6428   }
6429 
6430   int64_t Imm = 0;
6431 
6432   while (true) {
6433     unsigned Mode = 0;
6434     SMLoc S = getLoc();
6435 
6436     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6437       if (trySkipId(IdSymbolic[ModeId])) {
6438         Mode = 1 << ModeId;
6439         break;
6440       }
6441     }
6442 
6443     if (Mode == 0) {
6444       Error(S, (Imm == 0)?
6445                "expected a VGPR index mode or a closing parenthesis" :
6446                "expected a VGPR index mode");
6447       return UNDEF;
6448     }
6449 
6450     if (Imm & Mode) {
6451       Error(S, "duplicate VGPR index mode");
6452       return UNDEF;
6453     }
6454     Imm |= Mode;
6455 
6456     if (trySkipToken(AsmToken::RParen))
6457       break;
6458     if (!skipToken(AsmToken::Comma,
6459                    "expected a comma or a closing parenthesis"))
6460       return UNDEF;
6461   }
6462 
6463   return Imm;
6464 }
6465 
6466 OperandMatchResultTy
6467 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6468 
6469   using namespace llvm::AMDGPU::VGPRIndexMode;
6470 
6471   int64_t Imm = 0;
6472   SMLoc S = getLoc();
6473 
6474   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6475     Imm = parseGPRIdxMacro();
6476     if (Imm == UNDEF)
6477       return MatchOperand_ParseFail;
6478   } else {
6479     if (getParser().parseAbsoluteExpression(Imm))
6480       return MatchOperand_ParseFail;
6481     if (Imm < 0 || !isUInt<4>(Imm)) {
6482       Error(S, "invalid immediate: only 4-bit values are legal");
6483       return MatchOperand_ParseFail;
6484     }
6485   }
6486 
6487   Operands.push_back(
6488       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6489   return MatchOperand_Success;
6490 }
6491 
6492 bool AMDGPUOperand::isGPRIdxMode() const {
6493   return isImmTy(ImmTyGprIdxMode);
6494 }
6495 
6496 //===----------------------------------------------------------------------===//
6497 // sopp branch targets
6498 //===----------------------------------------------------------------------===//
6499 
6500 OperandMatchResultTy
6501 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6502 
6503   // Make sure we are not parsing something
6504   // that looks like a label or an expression but is not.
6505   // This will improve error messages.
6506   if (isRegister() || isModifier())
6507     return MatchOperand_NoMatch;
6508 
6509   if (!parseExpr(Operands))
6510     return MatchOperand_ParseFail;
6511 
6512   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6513   assert(Opr.isImm() || Opr.isExpr());
6514   SMLoc Loc = Opr.getStartLoc();
6515 
6516   // Currently we do not support arbitrary expressions as branch targets.
6517   // Only labels and absolute expressions are accepted.
6518   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6519     Error(Loc, "expected an absolute expression or a label");
6520   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6521     Error(Loc, "expected a 16-bit signed jump offset");
6522   }
6523 
6524   return MatchOperand_Success;
6525 }
6526 
6527 //===----------------------------------------------------------------------===//
6528 // Boolean holding registers
6529 //===----------------------------------------------------------------------===//
6530 
6531 OperandMatchResultTy
6532 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6533   return parseReg(Operands);
6534 }
6535 
6536 //===----------------------------------------------------------------------===//
6537 // mubuf
6538 //===----------------------------------------------------------------------===//
6539 
6540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6541   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6542 }
6543 
6544 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6545   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6546 }
6547 
6548 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6549   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6550 }
6551 
6552 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6553   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6554 }
6555 
6556 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6557                                const OperandVector &Operands,
6558                                bool IsAtomic,
6559                                bool IsAtomicReturn,
6560                                bool IsLds) {
6561   bool IsLdsOpcode = IsLds;
6562   bool HasLdsModifier = false;
6563   OptionalImmIndexMap OptionalIdx;
6564   assert(IsAtomicReturn ? IsAtomic : true);
6565   unsigned FirstOperandIdx = 1;
6566 
6567   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6568     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6569 
6570     // Add the register arguments
6571     if (Op.isReg()) {
6572       Op.addRegOperands(Inst, 1);
6573       // Insert a tied src for atomic return dst.
6574       // This cannot be postponed as subsequent calls to
6575       // addImmOperands rely on correct number of MC operands.
6576       if (IsAtomicReturn && i == FirstOperandIdx)
6577         Op.addRegOperands(Inst, 1);
6578       continue;
6579     }
6580 
6581     // Handle the case where soffset is an immediate
6582     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6583       Op.addImmOperands(Inst, 1);
6584       continue;
6585     }
6586 
6587     HasLdsModifier |= Op.isLDS();
6588 
6589     // Handle tokens like 'offen' which are sometimes hard-coded into the
6590     // asm string.  There are no MCInst operands for these.
6591     if (Op.isToken()) {
6592       continue;
6593     }
6594     assert(Op.isImm());
6595 
6596     // Handle optional arguments
6597     OptionalIdx[Op.getImmTy()] = i;
6598   }
6599 
6600   // This is a workaround for an llvm quirk which may result in an
6601   // incorrect instruction selection. Lds and non-lds versions of
6602   // MUBUF instructions are identical except that lds versions
6603   // have mandatory 'lds' modifier. However this modifier follows
6604   // optional modifiers and llvm asm matcher regards this 'lds'
6605   // modifier as an optional one. As a result, an lds version
6606   // of opcode may be selected even if it has no 'lds' modifier.
6607   if (IsLdsOpcode && !HasLdsModifier) {
6608     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6609     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6610       Inst.setOpcode(NoLdsOpcode);
6611       IsLdsOpcode = false;
6612     }
6613   }
6614 
6615   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6616   if (!IsAtomic || IsAtomicReturn) {
6617     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6618                           IsAtomicReturn ? -1 : 0);
6619   }
6620   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6621 
6622   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6623     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6624   }
6625 
6626   if (isGFX10Plus())
6627     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6628 }
6629 
6630 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6631   OptionalImmIndexMap OptionalIdx;
6632 
6633   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6634     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6635 
6636     // Add the register arguments
6637     if (Op.isReg()) {
6638       Op.addRegOperands(Inst, 1);
6639       continue;
6640     }
6641 
6642     // Handle the case where soffset is an immediate
6643     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6644       Op.addImmOperands(Inst, 1);
6645       continue;
6646     }
6647 
6648     // Handle tokens like 'offen' which are sometimes hard-coded into the
6649     // asm string.  There are no MCInst operands for these.
6650     if (Op.isToken()) {
6651       continue;
6652     }
6653     assert(Op.isImm());
6654 
6655     // Handle optional arguments
6656     OptionalIdx[Op.getImmTy()] = i;
6657   }
6658 
6659   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6660                         AMDGPUOperand::ImmTyOffset);
6661   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6662   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6663   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6664   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6665 
6666   if (isGFX10Plus())
6667     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6668 }
6669 
6670 //===----------------------------------------------------------------------===//
6671 // mimg
6672 //===----------------------------------------------------------------------===//
6673 
6674 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6675                               bool IsAtomic) {
6676   unsigned I = 1;
6677   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6678   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6679     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6680   }
6681 
6682   if (IsAtomic) {
6683     // Add src, same as dst
6684     assert(Desc.getNumDefs() == 1);
6685     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6686   }
6687 
6688   OptionalImmIndexMap OptionalIdx;
6689 
6690   for (unsigned E = Operands.size(); I != E; ++I) {
6691     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6692 
6693     // Add the register arguments
6694     if (Op.isReg()) {
6695       Op.addRegOperands(Inst, 1);
6696     } else if (Op.isImmModifier()) {
6697       OptionalIdx[Op.getImmTy()] = I;
6698     } else if (!Op.isToken()) {
6699       llvm_unreachable("unexpected operand type");
6700     }
6701   }
6702 
6703   bool IsGFX10Plus = isGFX10Plus();
6704 
6705   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6706   if (IsGFX10Plus)
6707     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6708   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6709   if (IsGFX10Plus)
6710     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6711   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6712   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6713   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6714   if (IsGFX10Plus)
6715     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6716   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6717   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6718   if (!IsGFX10Plus)
6719     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6720   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6721 }
6722 
6723 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6724   cvtMIMG(Inst, Operands, true);
6725 }
6726 
6727 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6728                                       const OperandVector &Operands) {
6729   for (unsigned I = 1; I < Operands.size(); ++I) {
6730     auto &Operand = (AMDGPUOperand &)*Operands[I];
6731     if (Operand.isReg())
6732       Operand.addRegOperands(Inst, 1);
6733   }
6734 
6735   Inst.addOperand(MCOperand::createImm(1)); // a16
6736 }
6737 
6738 //===----------------------------------------------------------------------===//
6739 // smrd
6740 //===----------------------------------------------------------------------===//
6741 
6742 bool AMDGPUOperand::isSMRDOffset8() const {
6743   return isImm() && isUInt<8>(getImm());
6744 }
6745 
6746 bool AMDGPUOperand::isSMEMOffset() const {
6747   return isImm(); // Offset range is checked later by validator.
6748 }
6749 
6750 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6751   // 32-bit literals are only supported on CI and we only want to use them
6752   // when the offset is > 8-bits.
6753   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6754 }
6755 
6756 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6757   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6758 }
6759 
6760 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6761   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6762 }
6763 
6764 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6765   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6766 }
6767 
6768 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6769   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6770 }
6771 
6772 //===----------------------------------------------------------------------===//
6773 // vop3
6774 //===----------------------------------------------------------------------===//
6775 
6776 static bool ConvertOmodMul(int64_t &Mul) {
6777   if (Mul != 1 && Mul != 2 && Mul != 4)
6778     return false;
6779 
6780   Mul >>= 1;
6781   return true;
6782 }
6783 
6784 static bool ConvertOmodDiv(int64_t &Div) {
6785   if (Div == 1) {
6786     Div = 0;
6787     return true;
6788   }
6789 
6790   if (Div == 2) {
6791     Div = 3;
6792     return true;
6793   }
6794 
6795   return false;
6796 }
6797 
6798 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6799   if (BoundCtrl == 0) {
6800     BoundCtrl = 1;
6801     return true;
6802   }
6803 
6804   if (BoundCtrl == -1) {
6805     BoundCtrl = 0;
6806     return true;
6807   }
6808 
6809   return false;
6810 }
6811 
6812 // Note: the order in this table matches the order of operands in AsmString.
6813 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6814   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6815   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6816   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6817   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6818   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6819   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6820   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6821   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6822   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6823   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6824   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6825   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6826   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6827   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6828   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6829   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6830   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6831   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6832   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6833   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6834   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6835   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6836   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6837   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6838   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6839   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6840   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6841   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6842   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6843   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6844   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6845   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6846   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6847   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6848   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6849   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6850   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6851   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6852   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6853   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6854   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6855   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6856   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6857 };
6858 
6859 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6860 
6861   OperandMatchResultTy res = parseOptionalOpr(Operands);
6862 
6863   // This is a hack to enable hardcoded mandatory operands which follow
6864   // optional operands.
6865   //
6866   // Current design assumes that all operands after the first optional operand
6867   // are also optional. However implementation of some instructions violates
6868   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6869   //
6870   // To alleviate this problem, we have to (implicitly) parse extra operands
6871   // to make sure autogenerated parser of custom operands never hit hardcoded
6872   // mandatory operands.
6873 
6874   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6875     if (res != MatchOperand_Success ||
6876         isToken(AsmToken::EndOfStatement))
6877       break;
6878 
6879     trySkipToken(AsmToken::Comma);
6880     res = parseOptionalOpr(Operands);
6881   }
6882 
6883   return res;
6884 }
6885 
6886 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6887   OperandMatchResultTy res;
6888   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6889     // try to parse any optional operand here
6890     if (Op.IsBit) {
6891       res = parseNamedBit(Op.Name, Operands, Op.Type);
6892     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6893       res = parseOModOperand(Operands);
6894     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6895                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6896                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6897       res = parseSDWASel(Operands, Op.Name, Op.Type);
6898     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6899       res = parseSDWADstUnused(Operands);
6900     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6901                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6902                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6903                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6904       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6905                                         Op.ConvertResult);
6906     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6907       res = parseDim(Operands);
6908     } else {
6909       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6910     }
6911     if (res != MatchOperand_NoMatch) {
6912       return res;
6913     }
6914   }
6915   return MatchOperand_NoMatch;
6916 }
6917 
6918 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6919   StringRef Name = getTokenStr();
6920   if (Name == "mul") {
6921     return parseIntWithPrefix("mul", Operands,
6922                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6923   }
6924 
6925   if (Name == "div") {
6926     return parseIntWithPrefix("div", Operands,
6927                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6928   }
6929 
6930   return MatchOperand_NoMatch;
6931 }
6932 
6933 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6934   cvtVOP3P(Inst, Operands);
6935 
6936   int Opc = Inst.getOpcode();
6937 
6938   int SrcNum;
6939   const int Ops[] = { AMDGPU::OpName::src0,
6940                       AMDGPU::OpName::src1,
6941                       AMDGPU::OpName::src2 };
6942   for (SrcNum = 0;
6943        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6944        ++SrcNum);
6945   assert(SrcNum > 0);
6946 
6947   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6948   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6949 
6950   if ((OpSel & (1 << SrcNum)) != 0) {
6951     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6952     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6953     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6954   }
6955 }
6956 
6957 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6958       // 1. This operand is input modifiers
6959   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6960       // 2. This is not last operand
6961       && Desc.NumOperands > (OpNum + 1)
6962       // 3. Next operand is register class
6963       && Desc.OpInfo[OpNum + 1].RegClass != -1
6964       // 4. Next register is not tied to any other operand
6965       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6966 }
6967 
6968 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6969 {
6970   OptionalImmIndexMap OptionalIdx;
6971   unsigned Opc = Inst.getOpcode();
6972 
6973   unsigned I = 1;
6974   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6975   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6976     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6977   }
6978 
6979   for (unsigned E = Operands.size(); I != E; ++I) {
6980     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6981     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6982       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6983     } else if (Op.isInterpSlot() ||
6984                Op.isInterpAttr() ||
6985                Op.isAttrChan()) {
6986       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6987     } else if (Op.isImmModifier()) {
6988       OptionalIdx[Op.getImmTy()] = I;
6989     } else {
6990       llvm_unreachable("unhandled operand type");
6991     }
6992   }
6993 
6994   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6995     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6996   }
6997 
6998   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6999     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7000   }
7001 
7002   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7003     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7004   }
7005 }
7006 
7007 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7008                               OptionalImmIndexMap &OptionalIdx) {
7009   unsigned Opc = Inst.getOpcode();
7010 
7011   unsigned I = 1;
7012   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7013   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7014     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7015   }
7016 
7017   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7018     // This instruction has src modifiers
7019     for (unsigned E = Operands.size(); I != E; ++I) {
7020       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7021       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7022         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7023       } else if (Op.isImmModifier()) {
7024         OptionalIdx[Op.getImmTy()] = I;
7025       } else if (Op.isRegOrImm()) {
7026         Op.addRegOrImmOperands(Inst, 1);
7027       } else {
7028         llvm_unreachable("unhandled operand type");
7029       }
7030     }
7031   } else {
7032     // No src modifiers
7033     for (unsigned E = Operands.size(); I != E; ++I) {
7034       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7035       if (Op.isMod()) {
7036         OptionalIdx[Op.getImmTy()] = I;
7037       } else {
7038         Op.addRegOrImmOperands(Inst, 1);
7039       }
7040     }
7041   }
7042 
7043   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7044     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7045   }
7046 
7047   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7048     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7049   }
7050 
7051   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7052   // it has src2 register operand that is tied to dst operand
7053   // we don't allow modifiers for this operand in assembler so src2_modifiers
7054   // should be 0.
7055   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7056       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7057       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7058       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7059       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7060       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7061       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7062       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7063       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7064       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7065     auto it = Inst.begin();
7066     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7067     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7068     ++it;
7069     // Copy the operand to ensure it's not invalidated when Inst grows.
7070     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7071   }
7072 }
7073 
7074 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7075   OptionalImmIndexMap OptionalIdx;
7076   cvtVOP3(Inst, Operands, OptionalIdx);
7077 }
7078 
7079 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7080                                const OperandVector &Operands) {
7081   OptionalImmIndexMap OptIdx;
7082   const int Opc = Inst.getOpcode();
7083   const MCInstrDesc &Desc = MII.get(Opc);
7084 
7085   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7086 
7087   cvtVOP3(Inst, Operands, OptIdx);
7088 
7089   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7090     assert(!IsPacked);
7091     Inst.addOperand(Inst.getOperand(0));
7092   }
7093 
7094   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7095   // instruction, and then figure out where to actually put the modifiers
7096 
7097   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7098 
7099   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7100   if (OpSelHiIdx != -1) {
7101     int DefaultVal = IsPacked ? -1 : 0;
7102     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7103                           DefaultVal);
7104   }
7105 
7106   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7107   if (NegLoIdx != -1) {
7108     assert(IsPacked);
7109     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7110     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7111   }
7112 
7113   const int Ops[] = { AMDGPU::OpName::src0,
7114                       AMDGPU::OpName::src1,
7115                       AMDGPU::OpName::src2 };
7116   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7117                          AMDGPU::OpName::src1_modifiers,
7118                          AMDGPU::OpName::src2_modifiers };
7119 
7120   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7121 
7122   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7123   unsigned OpSelHi = 0;
7124   unsigned NegLo = 0;
7125   unsigned NegHi = 0;
7126 
7127   if (OpSelHiIdx != -1) {
7128     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7129   }
7130 
7131   if (NegLoIdx != -1) {
7132     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7133     NegLo = Inst.getOperand(NegLoIdx).getImm();
7134     NegHi = Inst.getOperand(NegHiIdx).getImm();
7135   }
7136 
7137   for (int J = 0; J < 3; ++J) {
7138     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7139     if (OpIdx == -1)
7140       break;
7141 
7142     uint32_t ModVal = 0;
7143 
7144     if ((OpSel & (1 << J)) != 0)
7145       ModVal |= SISrcMods::OP_SEL_0;
7146 
7147     if ((OpSelHi & (1 << J)) != 0)
7148       ModVal |= SISrcMods::OP_SEL_1;
7149 
7150     if ((NegLo & (1 << J)) != 0)
7151       ModVal |= SISrcMods::NEG;
7152 
7153     if ((NegHi & (1 << J)) != 0)
7154       ModVal |= SISrcMods::NEG_HI;
7155 
7156     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7157 
7158     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7159   }
7160 }
7161 
7162 //===----------------------------------------------------------------------===//
7163 // dpp
7164 //===----------------------------------------------------------------------===//
7165 
7166 bool AMDGPUOperand::isDPP8() const {
7167   return isImmTy(ImmTyDPP8);
7168 }
7169 
7170 bool AMDGPUOperand::isDPPCtrl() const {
7171   using namespace AMDGPU::DPP;
7172 
7173   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7174   if (result) {
7175     int64_t Imm = getImm();
7176     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7177            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7178            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7179            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7180            (Imm == DppCtrl::WAVE_SHL1) ||
7181            (Imm == DppCtrl::WAVE_ROL1) ||
7182            (Imm == DppCtrl::WAVE_SHR1) ||
7183            (Imm == DppCtrl::WAVE_ROR1) ||
7184            (Imm == DppCtrl::ROW_MIRROR) ||
7185            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7186            (Imm == DppCtrl::BCAST15) ||
7187            (Imm == DppCtrl::BCAST31) ||
7188            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7189            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7190   }
7191   return false;
7192 }
7193 
7194 //===----------------------------------------------------------------------===//
7195 // mAI
7196 //===----------------------------------------------------------------------===//
7197 
7198 bool AMDGPUOperand::isBLGP() const {
7199   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7200 }
7201 
7202 bool AMDGPUOperand::isCBSZ() const {
7203   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7204 }
7205 
7206 bool AMDGPUOperand::isABID() const {
7207   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7208 }
7209 
7210 bool AMDGPUOperand::isS16Imm() const {
7211   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7212 }
7213 
7214 bool AMDGPUOperand::isU16Imm() const {
7215   return isImm() && isUInt<16>(getImm());
7216 }
7217 
7218 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7219   if (!isGFX10Plus())
7220     return MatchOperand_NoMatch;
7221 
7222   SMLoc S = getLoc();
7223 
7224   if (!trySkipId("dim", AsmToken::Colon))
7225     return MatchOperand_NoMatch;
7226 
7227   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7228   // integer.
7229   std::string Token;
7230   if (isToken(AsmToken::Integer)) {
7231     SMLoc Loc = getToken().getEndLoc();
7232     Token = std::string(getTokenStr());
7233     lex();
7234     if (getLoc() != Loc)
7235       return MatchOperand_ParseFail;
7236   }
7237   if (!isToken(AsmToken::Identifier))
7238     return MatchOperand_ParseFail;
7239   Token += getTokenStr();
7240 
7241   StringRef DimId = Token;
7242   if (DimId.startswith("SQ_RSRC_IMG_"))
7243     DimId = DimId.substr(12);
7244 
7245   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7246   if (!DimInfo)
7247     return MatchOperand_ParseFail;
7248 
7249   lex();
7250 
7251   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7252                                               AMDGPUOperand::ImmTyDim));
7253   return MatchOperand_Success;
7254 }
7255 
7256 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7257   SMLoc S = getLoc();
7258 
7259   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7260     return MatchOperand_NoMatch;
7261 
7262   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7263 
7264   int64_t Sels[8];
7265 
7266   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7267     return MatchOperand_ParseFail;
7268 
7269   for (size_t i = 0; i < 8; ++i) {
7270     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7271       return MatchOperand_ParseFail;
7272 
7273     SMLoc Loc = getLoc();
7274     if (getParser().parseAbsoluteExpression(Sels[i]))
7275       return MatchOperand_ParseFail;
7276     if (0 > Sels[i] || 7 < Sels[i]) {
7277       Error(Loc, "expected a 3-bit value");
7278       return MatchOperand_ParseFail;
7279     }
7280   }
7281 
7282   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7283     return MatchOperand_ParseFail;
7284 
7285   unsigned DPP8 = 0;
7286   for (size_t i = 0; i < 8; ++i)
7287     DPP8 |= (Sels[i] << (i * 3));
7288 
7289   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7290   return MatchOperand_Success;
7291 }
7292 
7293 bool
7294 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7295                                     const OperandVector &Operands) {
7296   if (Ctrl == "row_share" ||
7297       Ctrl == "row_xmask")
7298     return isGFX10Plus();
7299 
7300   if (Ctrl == "wave_shl" ||
7301       Ctrl == "wave_shr" ||
7302       Ctrl == "wave_rol" ||
7303       Ctrl == "wave_ror" ||
7304       Ctrl == "row_bcast")
7305     return isVI() || isGFX9();
7306 
7307   return Ctrl == "row_mirror" ||
7308          Ctrl == "row_half_mirror" ||
7309          Ctrl == "quad_perm" ||
7310          Ctrl == "row_shl" ||
7311          Ctrl == "row_shr" ||
7312          Ctrl == "row_ror";
7313 }
7314 
7315 int64_t
7316 AMDGPUAsmParser::parseDPPCtrlPerm() {
7317   // quad_perm:[%d,%d,%d,%d]
7318 
7319   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7320     return -1;
7321 
7322   int64_t Val = 0;
7323   for (int i = 0; i < 4; ++i) {
7324     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7325       return -1;
7326 
7327     int64_t Temp;
7328     SMLoc Loc = getLoc();
7329     if (getParser().parseAbsoluteExpression(Temp))
7330       return -1;
7331     if (Temp < 0 || Temp > 3) {
7332       Error(Loc, "expected a 2-bit value");
7333       return -1;
7334     }
7335 
7336     Val += (Temp << i * 2);
7337   }
7338 
7339   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7340     return -1;
7341 
7342   return Val;
7343 }
7344 
7345 int64_t
7346 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7347   using namespace AMDGPU::DPP;
7348 
7349   // sel:%d
7350 
7351   int64_t Val;
7352   SMLoc Loc = getLoc();
7353 
7354   if (getParser().parseAbsoluteExpression(Val))
7355     return -1;
7356 
7357   struct DppCtrlCheck {
7358     int64_t Ctrl;
7359     int Lo;
7360     int Hi;
7361   };
7362 
7363   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7364     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7365     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7366     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7367     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7368     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7369     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7370     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7371     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7372     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7373     .Default({-1, 0, 0});
7374 
7375   bool Valid;
7376   if (Check.Ctrl == -1) {
7377     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7378     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7379   } else {
7380     Valid = Check.Lo <= Val && Val <= Check.Hi;
7381     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7382   }
7383 
7384   if (!Valid) {
7385     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7386     return -1;
7387   }
7388 
7389   return Val;
7390 }
7391 
7392 OperandMatchResultTy
7393 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7394   using namespace AMDGPU::DPP;
7395 
7396   if (!isToken(AsmToken::Identifier) ||
7397       !isSupportedDPPCtrl(getTokenStr(), Operands))
7398     return MatchOperand_NoMatch;
7399 
7400   SMLoc S = getLoc();
7401   int64_t Val = -1;
7402   StringRef Ctrl;
7403 
7404   parseId(Ctrl);
7405 
7406   if (Ctrl == "row_mirror") {
7407     Val = DppCtrl::ROW_MIRROR;
7408   } else if (Ctrl == "row_half_mirror") {
7409     Val = DppCtrl::ROW_HALF_MIRROR;
7410   } else {
7411     if (skipToken(AsmToken::Colon, "expected a colon")) {
7412       if (Ctrl == "quad_perm") {
7413         Val = parseDPPCtrlPerm();
7414       } else {
7415         Val = parseDPPCtrlSel(Ctrl);
7416       }
7417     }
7418   }
7419 
7420   if (Val == -1)
7421     return MatchOperand_ParseFail;
7422 
7423   Operands.push_back(
7424     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7425   return MatchOperand_Success;
7426 }
7427 
7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7429   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7430 }
7431 
7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7433   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7434 }
7435 
7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7437   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7438 }
7439 
7440 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7441   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7442 }
7443 
7444 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7445   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7446 }
7447 
7448 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7449   OptionalImmIndexMap OptionalIdx;
7450 
7451   unsigned I = 1;
7452   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7453   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7454     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7455   }
7456 
7457   int Fi = 0;
7458   for (unsigned E = Operands.size(); I != E; ++I) {
7459     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7460                                             MCOI::TIED_TO);
7461     if (TiedTo != -1) {
7462       assert((unsigned)TiedTo < Inst.getNumOperands());
7463       // handle tied old or src2 for MAC instructions
7464       Inst.addOperand(Inst.getOperand(TiedTo));
7465     }
7466     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7467     // Add the register arguments
7468     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7469       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7470       // Skip it.
7471       continue;
7472     }
7473 
7474     if (IsDPP8) {
7475       if (Op.isDPP8()) {
7476         Op.addImmOperands(Inst, 1);
7477       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7478         Op.addRegWithFPInputModsOperands(Inst, 2);
7479       } else if (Op.isFI()) {
7480         Fi = Op.getImm();
7481       } else if (Op.isReg()) {
7482         Op.addRegOperands(Inst, 1);
7483       } else {
7484         llvm_unreachable("Invalid operand type");
7485       }
7486     } else {
7487       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7488         Op.addRegWithFPInputModsOperands(Inst, 2);
7489       } else if (Op.isDPPCtrl()) {
7490         Op.addImmOperands(Inst, 1);
7491       } else if (Op.isImm()) {
7492         // Handle optional arguments
7493         OptionalIdx[Op.getImmTy()] = I;
7494       } else {
7495         llvm_unreachable("Invalid operand type");
7496       }
7497     }
7498   }
7499 
7500   if (IsDPP8) {
7501     using namespace llvm::AMDGPU::DPP;
7502     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7503   } else {
7504     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7505     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7506     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7507     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7508       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7509     }
7510   }
7511 }
7512 
7513 //===----------------------------------------------------------------------===//
7514 // sdwa
7515 //===----------------------------------------------------------------------===//
7516 
7517 OperandMatchResultTy
7518 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7519                               AMDGPUOperand::ImmTy Type) {
7520   using namespace llvm::AMDGPU::SDWA;
7521 
7522   SMLoc S = getLoc();
7523   StringRef Value;
7524   OperandMatchResultTy res;
7525 
7526   res = parseStringWithPrefix(Prefix, Value);
7527   if (res != MatchOperand_Success) {
7528     return res;
7529   }
7530 
7531   int64_t Int;
7532   Int = StringSwitch<int64_t>(Value)
7533         .Case("BYTE_0", SdwaSel::BYTE_0)
7534         .Case("BYTE_1", SdwaSel::BYTE_1)
7535         .Case("BYTE_2", SdwaSel::BYTE_2)
7536         .Case("BYTE_3", SdwaSel::BYTE_3)
7537         .Case("WORD_0", SdwaSel::WORD_0)
7538         .Case("WORD_1", SdwaSel::WORD_1)
7539         .Case("DWORD", SdwaSel::DWORD)
7540         .Default(0xffffffff);
7541 
7542   if (Int == 0xffffffff) {
7543     return MatchOperand_ParseFail;
7544   }
7545 
7546   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7547   return MatchOperand_Success;
7548 }
7549 
7550 OperandMatchResultTy
7551 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7552   using namespace llvm::AMDGPU::SDWA;
7553 
7554   SMLoc S = getLoc();
7555   StringRef Value;
7556   OperandMatchResultTy res;
7557 
7558   res = parseStringWithPrefix("dst_unused", Value);
7559   if (res != MatchOperand_Success) {
7560     return res;
7561   }
7562 
7563   int64_t Int;
7564   Int = StringSwitch<int64_t>(Value)
7565         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7566         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7567         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7568         .Default(0xffffffff);
7569 
7570   if (Int == 0xffffffff) {
7571     return MatchOperand_ParseFail;
7572   }
7573 
7574   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7575   return MatchOperand_Success;
7576 }
7577 
7578 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7579   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7580 }
7581 
7582 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7583   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7584 }
7585 
7586 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7587   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7588 }
7589 
7590 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7591   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7592 }
7593 
7594 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7595   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7596 }
7597 
7598 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7599                               uint64_t BasicInstType,
7600                               bool SkipDstVcc,
7601                               bool SkipSrcVcc) {
7602   using namespace llvm::AMDGPU::SDWA;
7603 
7604   OptionalImmIndexMap OptionalIdx;
7605   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7606   bool SkippedVcc = false;
7607 
7608   unsigned I = 1;
7609   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7610   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7611     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7612   }
7613 
7614   for (unsigned E = Operands.size(); I != E; ++I) {
7615     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7616     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7617         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7618       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7619       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7620       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7621       // Skip VCC only if we didn't skip it on previous iteration.
7622       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7623       if (BasicInstType == SIInstrFlags::VOP2 &&
7624           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7625            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7626         SkippedVcc = true;
7627         continue;
7628       } else if (BasicInstType == SIInstrFlags::VOPC &&
7629                  Inst.getNumOperands() == 0) {
7630         SkippedVcc = true;
7631         continue;
7632       }
7633     }
7634     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7635       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7636     } else if (Op.isImm()) {
7637       // Handle optional arguments
7638       OptionalIdx[Op.getImmTy()] = I;
7639     } else {
7640       llvm_unreachable("Invalid operand type");
7641     }
7642     SkippedVcc = false;
7643   }
7644 
7645   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7646       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7647       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7648     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7649     switch (BasicInstType) {
7650     case SIInstrFlags::VOP1:
7651       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7652       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7653         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7654       }
7655       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7656       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7657       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7658       break;
7659 
7660     case SIInstrFlags::VOP2:
7661       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7662       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7663         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7664       }
7665       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7666       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7667       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7668       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7669       break;
7670 
7671     case SIInstrFlags::VOPC:
7672       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7673         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7674       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7675       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7676       break;
7677 
7678     default:
7679       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7680     }
7681   }
7682 
7683   // special case v_mac_{f16, f32}:
7684   // it has src2 register operand that is tied to dst operand
7685   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7686       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7687     auto it = Inst.begin();
7688     std::advance(
7689       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7690     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7691   }
7692 }
7693 
7694 //===----------------------------------------------------------------------===//
7695 // mAI
7696 //===----------------------------------------------------------------------===//
7697 
7698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7699   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7700 }
7701 
7702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7703   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7704 }
7705 
7706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7707   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7708 }
7709 
7710 /// Force static initialization.
7711 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7712   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7713   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7714 }
7715 
7716 #define GET_REGISTER_MATCHER
7717 #define GET_MATCHER_IMPLEMENTATION
7718 #define GET_MNEMONIC_SPELL_CHECKER
7719 #define GET_MNEMONIC_CHECKER
7720 #include "AMDGPUGenAsmMatcher.inc"
7721 
7722 // This fuction should be defined after auto-generated include so that we have
7723 // MatchClassKind enum defined
7724 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7725                                                      unsigned Kind) {
7726   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7727   // But MatchInstructionImpl() expects to meet token and fails to validate
7728   // operand. This method checks if we are given immediate operand but expect to
7729   // get corresponding token.
7730   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7731   switch (Kind) {
7732   case MCK_addr64:
7733     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7734   case MCK_gds:
7735     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7736   case MCK_lds:
7737     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7738   case MCK_glc:
7739     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7740   case MCK_idxen:
7741     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7742   case MCK_offen:
7743     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7744   case MCK_SSrcB32:
7745     // When operands have expression values, they will return true for isToken,
7746     // because it is not possible to distinguish between a token and an
7747     // expression at parse time. MatchInstructionImpl() will always try to
7748     // match an operand as a token, when isToken returns true, and when the
7749     // name of the expression is not a valid token, the match will fail,
7750     // so we need to handle it here.
7751     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7752   case MCK_SSrcF32:
7753     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7754   case MCK_SoppBrTarget:
7755     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7756   case MCK_VReg32OrOff:
7757     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7758   case MCK_InterpSlot:
7759     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7760   case MCK_Attr:
7761     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7762   case MCK_AttrChan:
7763     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7764   case MCK_ImmSMEMOffset:
7765     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7766   case MCK_SReg_64:
7767   case MCK_SReg_64_XEXEC:
7768     // Null is defined as a 32-bit register but
7769     // it should also be enabled with 64-bit operands.
7770     // The following code enables it for SReg_64 operands
7771     // used as source and destination. Remaining source
7772     // operands are handled in isInlinableImm.
7773     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7774   default:
7775     return Match_InvalidOperand;
7776   }
7777 }
7778 
7779 //===----------------------------------------------------------------------===//
7780 // endpgm
7781 //===----------------------------------------------------------------------===//
7782 
7783 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7784   SMLoc S = getLoc();
7785   int64_t Imm = 0;
7786 
7787   if (!parseExpr(Imm)) {
7788     // The operand is optional, if not present default to 0
7789     Imm = 0;
7790   }
7791 
7792   if (!isUInt<16>(Imm)) {
7793     Error(S, "expected a 16-bit value");
7794     return MatchOperand_ParseFail;
7795   }
7796 
7797   Operands.push_back(
7798       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7799   return MatchOperand_Success;
7800 }
7801 
7802 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7803