1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "TargetInfo/AMDGPUTargetInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/AMDGPUMetadata.h"
31 #include "llvm/Support/AMDHSAKernelDescriptor.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/MachineValueType.h"
34 #include "llvm/Support/TargetParser.h"
35 #include "llvm/Support/TargetRegistry.h"
36 
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 using namespace llvm::amdhsa;
40 
41 namespace {
42 
43 class AMDGPUAsmParser;
44 
45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
46 
47 //===----------------------------------------------------------------------===//
48 // Operand
49 //===----------------------------------------------------------------------===//
50 
51 class AMDGPUOperand : public MCParsedAsmOperand {
52   enum KindTy {
53     Token,
54     Immediate,
55     Register,
56     Expression
57   } Kind;
58 
59   SMLoc StartLoc, EndLoc;
60   const AMDGPUAsmParser *AsmParser;
61 
62 public:
63   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
64     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
65 
66   using Ptr = std::unique_ptr<AMDGPUOperand>;
67 
68   struct Modifiers {
69     bool Abs = false;
70     bool Neg = false;
71     bool Sext = false;
72 
73     bool hasFPModifiers() const { return Abs || Neg; }
74     bool hasIntModifiers() const { return Sext; }
75     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
76 
77     int64_t getFPModifiersOperand() const {
78       int64_t Operand = 0;
79       Operand |= Abs ? SISrcMods::ABS : 0u;
80       Operand |= Neg ? SISrcMods::NEG : 0u;
81       return Operand;
82     }
83 
84     int64_t getIntModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Sext ? SISrcMods::SEXT : 0u;
87       return Operand;
88     }
89 
90     int64_t getModifiersOperand() const {
91       assert(!(hasFPModifiers() && hasIntModifiers())
92            && "fp and int modifiers should not be used simultaneously");
93       if (hasFPModifiers()) {
94         return getFPModifiersOperand();
95       } else if (hasIntModifiers()) {
96         return getIntModifiersOperand();
97       } else {
98         return 0;
99       }
100     }
101 
102     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
103   };
104 
105   enum ImmTy {
106     ImmTyNone,
107     ImmTyGDS,
108     ImmTyLDS,
109     ImmTyOffen,
110     ImmTyIdxen,
111     ImmTyAddr64,
112     ImmTyOffset,
113     ImmTyInstOffset,
114     ImmTyOffset0,
115     ImmTyOffset1,
116     ImmTyDLC,
117     ImmTyGLC,
118     ImmTySLC,
119     ImmTySWZ,
120     ImmTyTFE,
121     ImmTyD16,
122     ImmTyClampSI,
123     ImmTyOModSI,
124     ImmTyDPP8,
125     ImmTyDppCtrl,
126     ImmTyDppRowMask,
127     ImmTyDppBankMask,
128     ImmTyDppBoundCtrl,
129     ImmTyDppFi,
130     ImmTySdwaDstSel,
131     ImmTySdwaSrc0Sel,
132     ImmTySdwaSrc1Sel,
133     ImmTySdwaDstUnused,
134     ImmTyDMask,
135     ImmTyDim,
136     ImmTyUNorm,
137     ImmTyDA,
138     ImmTyR128A16,
139     ImmTyA16,
140     ImmTyLWE,
141     ImmTyExpTgt,
142     ImmTyExpCompr,
143     ImmTyExpVM,
144     ImmTyFORMAT,
145     ImmTyHwreg,
146     ImmTyOff,
147     ImmTySendMsg,
148     ImmTyInterpSlot,
149     ImmTyInterpAttr,
150     ImmTyAttrChan,
151     ImmTyOpSel,
152     ImmTyOpSelHi,
153     ImmTyNegLo,
154     ImmTyNegHi,
155     ImmTySwizzle,
156     ImmTyGprIdxMode,
157     ImmTyHigh,
158     ImmTyBLGP,
159     ImmTyCBSZ,
160     ImmTyABID,
161     ImmTyEndpgm,
162   };
163 
164   enum ImmKindTy {
165     ImmKindTyNone,
166     ImmKindTyLiteral,
167     ImmKindTyConst,
168   };
169 
170 private:
171   struct TokOp {
172     const char *Data;
173     unsigned Length;
174   };
175 
176   struct ImmOp {
177     int64_t Val;
178     ImmTy Type;
179     bool IsFPImm;
180     mutable ImmKindTy Kind;
181     Modifiers Mods;
182   };
183 
184   struct RegOp {
185     unsigned RegNo;
186     Modifiers Mods;
187   };
188 
189   union {
190     TokOp Tok;
191     ImmOp Imm;
192     RegOp Reg;
193     const MCExpr *Expr;
194   };
195 
196 public:
197   bool isToken() const override {
198     if (Kind == Token)
199       return true;
200 
201     // When parsing operands, we can't always tell if something was meant to be
202     // a token, like 'gds', or an expression that references a global variable.
203     // In this case, we assume the string is an expression, and if we need to
204     // interpret is a token, then we treat the symbol name as the token.
205     return isSymbolRefExpr();
206   }
207 
208   bool isSymbolRefExpr() const {
209     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
210   }
211 
212   bool isImm() const override {
213     return Kind == Immediate;
214   }
215 
216   void setImmKindNone() const {
217     assert(isImm());
218     Imm.Kind = ImmKindTyNone;
219   }
220 
221   void setImmKindLiteral() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyLiteral;
224   }
225 
226   void setImmKindConst() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyConst;
229   }
230 
231   bool IsImmKindLiteral() const {
232     return isImm() && Imm.Kind == ImmKindTyLiteral;
233   }
234 
235   bool isImmKindConst() const {
236     return isImm() && Imm.Kind == ImmKindTyConst;
237   }
238 
239   bool isInlinableImm(MVT type) const;
240   bool isLiteralImm(MVT type) const;
241 
242   bool isRegKind() const {
243     return Kind == Register;
244   }
245 
246   bool isReg() const override {
247     return isRegKind() && !hasModifiers();
248   }
249 
250   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
251     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
252   }
253 
254   bool isRegOrImmWithInt16InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
256   }
257 
258   bool isRegOrImmWithInt32InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
260   }
261 
262   bool isRegOrImmWithInt64InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
264   }
265 
266   bool isRegOrImmWithFP16InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
268   }
269 
270   bool isRegOrImmWithFP32InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
272   }
273 
274   bool isRegOrImmWithFP64InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
276   }
277 
278   bool isVReg() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
280            isRegClass(AMDGPU::VReg_64RegClassID) ||
281            isRegClass(AMDGPU::VReg_96RegClassID) ||
282            isRegClass(AMDGPU::VReg_128RegClassID) ||
283            isRegClass(AMDGPU::VReg_160RegClassID) ||
284            isRegClass(AMDGPU::VReg_192RegClassID) ||
285            isRegClass(AMDGPU::VReg_256RegClassID) ||
286            isRegClass(AMDGPU::VReg_512RegClassID) ||
287            isRegClass(AMDGPU::VReg_1024RegClassID);
288   }
289 
290   bool isVReg32() const {
291     return isRegClass(AMDGPU::VGPR_32RegClassID);
292   }
293 
294   bool isVReg32OrOff() const {
295     return isOff() || isVReg32();
296   }
297 
298   bool isNull() const {
299     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
300   }
301 
302   bool isSDWAOperand(MVT type) const;
303   bool isSDWAFP16Operand() const;
304   bool isSDWAFP32Operand() const;
305   bool isSDWAInt16Operand() const;
306   bool isSDWAInt32Operand() const;
307 
308   bool isImmTy(ImmTy ImmT) const {
309     return isImm() && Imm.Type == ImmT;
310   }
311 
312   bool isImmModifier() const {
313     return isImm() && Imm.Type != ImmTyNone;
314   }
315 
316   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
317   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
318   bool isDMask() const { return isImmTy(ImmTyDMask); }
319   bool isDim() const { return isImmTy(ImmTyDim); }
320   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
321   bool isDA() const { return isImmTy(ImmTyDA); }
322   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
323   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
324   bool isLWE() const { return isImmTy(ImmTyLWE); }
325   bool isOff() const { return isImmTy(ImmTyOff); }
326   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
327   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
328   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
329   bool isOffen() const { return isImmTy(ImmTyOffen); }
330   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
331   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
332   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
333   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
334   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
335 
336   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
337   bool isGDS() const { return isImmTy(ImmTyGDS); }
338   bool isLDS() const { return isImmTy(ImmTyLDS); }
339   bool isDLC() const { return isImmTy(ImmTyDLC); }
340   bool isGLC() const { return isImmTy(ImmTyGLC); }
341   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
342   // value of the GLC operand.
343   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
344   bool isSLC() const { return isImmTy(ImmTySLC); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcOrLdsB32() const {
453     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
454            isLiteralImm(MVT::i32) || isExpr();
455   }
456 
457   bool isVCSrcB32() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
459   }
460 
461   bool isVCSrcB64() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
463   }
464 
465   bool isVCSrcB16() const {
466     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
467   }
468 
469   bool isVCSrcV2B16() const {
470     return isVCSrcB16();
471   }
472 
473   bool isVCSrcF32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
475   }
476 
477   bool isVCSrcF64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
479   }
480 
481   bool isVCSrcF16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
483   }
484 
485   bool isVCSrcV2F16() const {
486     return isVCSrcF16();
487   }
488 
489   bool isVSrcB32() const {
490     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
491   }
492 
493   bool isVSrcB64() const {
494     return isVCSrcF64() || isLiteralImm(MVT::i64);
495   }
496 
497   bool isVSrcB16() const {
498     return isVCSrcB16() || isLiteralImm(MVT::i16);
499   }
500 
501   bool isVSrcV2B16() const {
502     return isVSrcB16() || isLiteralImm(MVT::v2i16);
503   }
504 
505   bool isVSrcF32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
507   }
508 
509   bool isVSrcF64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::f64);
511   }
512 
513   bool isVSrcF16() const {
514     return isVCSrcF16() || isLiteralImm(MVT::f16);
515   }
516 
517   bool isVSrcV2F16() const {
518     return isVSrcF16() || isLiteralImm(MVT::v2f16);
519   }
520 
521   bool isVISrcB32() const {
522     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
523   }
524 
525   bool isVISrcB16() const {
526     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
527   }
528 
529   bool isVISrcV2B16() const {
530     return isVISrcB16();
531   }
532 
533   bool isVISrcF32() const {
534     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
535   }
536 
537   bool isVISrcF16() const {
538     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
539   }
540 
541   bool isVISrcV2F16() const {
542     return isVISrcF16() || isVISrcB32();
543   }
544 
545   bool isAISrcB32() const {
546     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
547   }
548 
549   bool isAISrcB16() const {
550     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
551   }
552 
553   bool isAISrcV2B16() const {
554     return isAISrcB16();
555   }
556 
557   bool isAISrcF32() const {
558     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
559   }
560 
561   bool isAISrcF16() const {
562     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
563   }
564 
565   bool isAISrcV2F16() const {
566     return isAISrcF16() || isAISrcB32();
567   }
568 
569   bool isAISrc_128B32() const {
570     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
571   }
572 
573   bool isAISrc_128B16() const {
574     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
575   }
576 
577   bool isAISrc_128V2B16() const {
578     return isAISrc_128B16();
579   }
580 
581   bool isAISrc_128F32() const {
582     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
583   }
584 
585   bool isAISrc_128F16() const {
586     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
587   }
588 
589   bool isAISrc_128V2F16() const {
590     return isAISrc_128F16() || isAISrc_128B32();
591   }
592 
593   bool isAISrc_512B32() const {
594     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
595   }
596 
597   bool isAISrc_512B16() const {
598     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
599   }
600 
601   bool isAISrc_512V2B16() const {
602     return isAISrc_512B16();
603   }
604 
605   bool isAISrc_512F32() const {
606     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
607   }
608 
609   bool isAISrc_512F16() const {
610     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
611   }
612 
613   bool isAISrc_512V2F16() const {
614     return isAISrc_512F16() || isAISrc_512B32();
615   }
616 
617   bool isAISrc_1024B32() const {
618     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
619   }
620 
621   bool isAISrc_1024B16() const {
622     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
623   }
624 
625   bool isAISrc_1024V2B16() const {
626     return isAISrc_1024B16();
627   }
628 
629   bool isAISrc_1024F32() const {
630     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
631   }
632 
633   bool isAISrc_1024F16() const {
634     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
635   }
636 
637   bool isAISrc_1024V2F16() const {
638     return isAISrc_1024F16() || isAISrc_1024B32();
639   }
640 
641   bool isKImmFP32() const {
642     return isLiteralImm(MVT::f32);
643   }
644 
645   bool isKImmFP16() const {
646     return isLiteralImm(MVT::f16);
647   }
648 
649   bool isMem() const override {
650     return false;
651   }
652 
653   bool isExpr() const {
654     return Kind == Expression;
655   }
656 
657   bool isSoppBrTarget() const {
658     return isExpr() || isImm();
659   }
660 
661   bool isSWaitCnt() const;
662   bool isHwreg() const;
663   bool isSendMsg() const;
664   bool isSwizzle() const;
665   bool isSMRDOffset8() const;
666   bool isSMEMOffset() const;
667   bool isSMRDLiteralOffset() const;
668   bool isDPP8() const;
669   bool isDPPCtrl() const;
670   bool isBLGP() const;
671   bool isCBSZ() const;
672   bool isABID() const;
673   bool isGPRIdxMode() const;
674   bool isS16Imm() const;
675   bool isU16Imm() const;
676   bool isEndpgm() const;
677 
678   StringRef getExpressionAsToken() const {
679     assert(isExpr());
680     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
681     return S->getSymbol().getName();
682   }
683 
684   StringRef getToken() const {
685     assert(isToken());
686 
687     if (Kind == Expression)
688       return getExpressionAsToken();
689 
690     return StringRef(Tok.Data, Tok.Length);
691   }
692 
693   int64_t getImm() const {
694     assert(isImm());
695     return Imm.Val;
696   }
697 
698   void setImm(int64_t Val) {
699     assert(isImm());
700     Imm.Val = Val;
701   }
702 
703   ImmTy getImmTy() const {
704     assert(isImm());
705     return Imm.Type;
706   }
707 
708   unsigned getReg() const override {
709     assert(isRegKind());
710     return Reg.RegNo;
711   }
712 
713   SMLoc getStartLoc() const override {
714     return StartLoc;
715   }
716 
717   SMLoc getEndLoc() const override {
718     return EndLoc;
719   }
720 
721   SMRange getLocRange() const {
722     return SMRange(StartLoc, EndLoc);
723   }
724 
725   Modifiers getModifiers() const {
726     assert(isRegKind() || isImmTy(ImmTyNone));
727     return isRegKind() ? Reg.Mods : Imm.Mods;
728   }
729 
730   void setModifiers(Modifiers Mods) {
731     assert(isRegKind() || isImmTy(ImmTyNone));
732     if (isRegKind())
733       Reg.Mods = Mods;
734     else
735       Imm.Mods = Mods;
736   }
737 
738   bool hasModifiers() const {
739     return getModifiers().hasModifiers();
740   }
741 
742   bool hasFPModifiers() const {
743     return getModifiers().hasFPModifiers();
744   }
745 
746   bool hasIntModifiers() const {
747     return getModifiers().hasIntModifiers();
748   }
749 
750   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
751 
752   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
753 
754   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
755 
756   template <unsigned Bitwidth>
757   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
758 
759   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
760     addKImmFPOperands<16>(Inst, N);
761   }
762 
763   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
764     addKImmFPOperands<32>(Inst, N);
765   }
766 
767   void addRegOperands(MCInst &Inst, unsigned N) const;
768 
769   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
770     addRegOperands(Inst, N);
771   }
772 
773   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
774     if (isRegKind())
775       addRegOperands(Inst, N);
776     else if (isExpr())
777       Inst.addOperand(MCOperand::createExpr(Expr));
778     else
779       addImmOperands(Inst, N);
780   }
781 
782   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
783     Modifiers Mods = getModifiers();
784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785     if (isRegKind()) {
786       addRegOperands(Inst, N);
787     } else {
788       addImmOperands(Inst, N, false);
789     }
790   }
791 
792   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
793     assert(!hasIntModifiers());
794     addRegOrImmWithInputModsOperands(Inst, N);
795   }
796 
797   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasFPModifiers());
799     addRegOrImmWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
803     Modifiers Mods = getModifiers();
804     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
805     assert(isRegKind());
806     addRegOperands(Inst, N);
807   }
808 
809   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
810     assert(!hasIntModifiers());
811     addRegWithInputModsOperands(Inst, N);
812   }
813 
814   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
815     assert(!hasFPModifiers());
816     addRegWithInputModsOperands(Inst, N);
817   }
818 
819   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
820     if (isImm())
821       addImmOperands(Inst, N);
822     else {
823       assert(isExpr());
824       Inst.addOperand(MCOperand::createExpr(Expr));
825     }
826   }
827 
828   static void printImmTy(raw_ostream& OS, ImmTy Type) {
829     switch (Type) {
830     case ImmTyNone: OS << "None"; break;
831     case ImmTyGDS: OS << "GDS"; break;
832     case ImmTyLDS: OS << "LDS"; break;
833     case ImmTyOffen: OS << "Offen"; break;
834     case ImmTyIdxen: OS << "Idxen"; break;
835     case ImmTyAddr64: OS << "Addr64"; break;
836     case ImmTyOffset: OS << "Offset"; break;
837     case ImmTyInstOffset: OS << "InstOffset"; break;
838     case ImmTyOffset0: OS << "Offset0"; break;
839     case ImmTyOffset1: OS << "Offset1"; break;
840     case ImmTyDLC: OS << "DLC"; break;
841     case ImmTyGLC: OS << "GLC"; break;
842     case ImmTySLC: OS << "SLC"; break;
843     case ImmTySWZ: OS << "SWZ"; break;
844     case ImmTyTFE: OS << "TFE"; break;
845     case ImmTyD16: OS << "D16"; break;
846     case ImmTyFORMAT: OS << "FORMAT"; break;
847     case ImmTyClampSI: OS << "ClampSI"; break;
848     case ImmTyOModSI: OS << "OModSI"; break;
849     case ImmTyDPP8: OS << "DPP8"; break;
850     case ImmTyDppCtrl: OS << "DppCtrl"; break;
851     case ImmTyDppRowMask: OS << "DppRowMask"; break;
852     case ImmTyDppBankMask: OS << "DppBankMask"; break;
853     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
854     case ImmTyDppFi: OS << "FI"; break;
855     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
856     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
857     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
858     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
859     case ImmTyDMask: OS << "DMask"; break;
860     case ImmTyDim: OS << "Dim"; break;
861     case ImmTyUNorm: OS << "UNorm"; break;
862     case ImmTyDA: OS << "DA"; break;
863     case ImmTyR128A16: OS << "R128A16"; break;
864     case ImmTyA16: OS << "A16"; break;
865     case ImmTyLWE: OS << "LWE"; break;
866     case ImmTyOff: OS << "Off"; break;
867     case ImmTyExpTgt: OS << "ExpTgt"; break;
868     case ImmTyExpCompr: OS << "ExpCompr"; break;
869     case ImmTyExpVM: OS << "ExpVM"; break;
870     case ImmTyHwreg: OS << "Hwreg"; break;
871     case ImmTySendMsg: OS << "SendMsg"; break;
872     case ImmTyInterpSlot: OS << "InterpSlot"; break;
873     case ImmTyInterpAttr: OS << "InterpAttr"; break;
874     case ImmTyAttrChan: OS << "AttrChan"; break;
875     case ImmTyOpSel: OS << "OpSel"; break;
876     case ImmTyOpSelHi: OS << "OpSelHi"; break;
877     case ImmTyNegLo: OS << "NegLo"; break;
878     case ImmTyNegHi: OS << "NegHi"; break;
879     case ImmTySwizzle: OS << "Swizzle"; break;
880     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
881     case ImmTyHigh: OS << "High"; break;
882     case ImmTyBLGP: OS << "BLGP"; break;
883     case ImmTyCBSZ: OS << "CBSZ"; break;
884     case ImmTyABID: OS << "ABID"; break;
885     case ImmTyEndpgm: OS << "Endpgm"; break;
886     }
887   }
888 
889   void print(raw_ostream &OS) const override {
890     switch (Kind) {
891     case Register:
892       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
893       break;
894     case Immediate:
895       OS << '<' << getImm();
896       if (getImmTy() != ImmTyNone) {
897         OS << " type: "; printImmTy(OS, getImmTy());
898       }
899       OS << " mods: " << Imm.Mods << '>';
900       break;
901     case Token:
902       OS << '\'' << getToken() << '\'';
903       break;
904     case Expression:
905       OS << "<expr " << *Expr << '>';
906       break;
907     }
908   }
909 
910   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
911                                       int64_t Val, SMLoc Loc,
912                                       ImmTy Type = ImmTyNone,
913                                       bool IsFPImm = false) {
914     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
915     Op->Imm.Val = Val;
916     Op->Imm.IsFPImm = IsFPImm;
917     Op->Imm.Kind = ImmKindTyNone;
918     Op->Imm.Type = Type;
919     Op->Imm.Mods = Modifiers();
920     Op->StartLoc = Loc;
921     Op->EndLoc = Loc;
922     return Op;
923   }
924 
925   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
926                                         StringRef Str, SMLoc Loc,
927                                         bool HasExplicitEncodingSize = true) {
928     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
929     Res->Tok.Data = Str.data();
930     Res->Tok.Length = Str.size();
931     Res->StartLoc = Loc;
932     Res->EndLoc = Loc;
933     return Res;
934   }
935 
936   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
937                                       unsigned RegNo, SMLoc S,
938                                       SMLoc E) {
939     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
940     Op->Reg.RegNo = RegNo;
941     Op->Reg.Mods = Modifiers();
942     Op->StartLoc = S;
943     Op->EndLoc = E;
944     return Op;
945   }
946 
947   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
948                                        const class MCExpr *Expr, SMLoc S) {
949     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
950     Op->Expr = Expr;
951     Op->StartLoc = S;
952     Op->EndLoc = S;
953     return Op;
954   }
955 };
956 
957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
958   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
959   return OS;
960 }
961 
962 //===----------------------------------------------------------------------===//
963 // AsmParser
964 //===----------------------------------------------------------------------===//
965 
966 // Holds info related to the current kernel, e.g. count of SGPRs used.
967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
968 // .amdgpu_hsa_kernel or at EOF.
969 class KernelScopeInfo {
970   int SgprIndexUnusedMin = -1;
971   int VgprIndexUnusedMin = -1;
972   MCContext *Ctx = nullptr;
973 
974   void usesSgprAt(int i) {
975     if (i >= SgprIndexUnusedMin) {
976       SgprIndexUnusedMin = ++i;
977       if (Ctx) {
978         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
979         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
980       }
981     }
982   }
983 
984   void usesVgprAt(int i) {
985     if (i >= VgprIndexUnusedMin) {
986       VgprIndexUnusedMin = ++i;
987       if (Ctx) {
988         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
989         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
990       }
991     }
992   }
993 
994 public:
995   KernelScopeInfo() = default;
996 
997   void initialize(MCContext &Context) {
998     Ctx = &Context;
999     usesSgprAt(SgprIndexUnusedMin = -1);
1000     usesVgprAt(VgprIndexUnusedMin = -1);
1001   }
1002 
1003   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1004     switch (RegKind) {
1005       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1006       case IS_AGPR: // fall through
1007       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1008       default: break;
1009     }
1010   }
1011 };
1012 
1013 class AMDGPUAsmParser : public MCTargetAsmParser {
1014   MCAsmParser &Parser;
1015 
1016   // Number of extra operands parsed after the first optional operand.
1017   // This may be necessary to skip hardcoded mandatory operands.
1018   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1019 
1020   unsigned ForcedEncodingSize = 0;
1021   bool ForcedDPP = false;
1022   bool ForcedSDWA = false;
1023   KernelScopeInfo KernelScope;
1024 
1025   /// @name Auto-generated Match Functions
1026   /// {
1027 
1028 #define GET_ASSEMBLER_HEADER
1029 #include "AMDGPUGenAsmMatcher.inc"
1030 
1031   /// }
1032 
1033 private:
1034   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1035   bool OutOfRangeError(SMRange Range);
1036   /// Calculate VGPR/SGPR blocks required for given target, reserved
1037   /// registers, and user-specified NextFreeXGPR values.
1038   ///
1039   /// \param Features [in] Target features, used for bug corrections.
1040   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1041   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1042   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1043   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1044   /// descriptor field, if valid.
1045   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1046   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1047   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1048   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1049   /// \param VGPRBlocks [out] Result VGPR block count.
1050   /// \param SGPRBlocks [out] Result SGPR block count.
1051   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1052                           bool FlatScrUsed, bool XNACKUsed,
1053                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1054                           SMRange VGPRRange, unsigned NextFreeSGPR,
1055                           SMRange SGPRRange, unsigned &VGPRBlocks,
1056                           unsigned &SGPRBlocks);
1057   bool ParseDirectiveAMDGCNTarget();
1058   bool ParseDirectiveAMDHSAKernel();
1059   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1060   bool ParseDirectiveHSACodeObjectVersion();
1061   bool ParseDirectiveHSACodeObjectISA();
1062   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1063   bool ParseDirectiveAMDKernelCodeT();
1064   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1065   bool ParseDirectiveAMDGPUHsaKernel();
1066 
1067   bool ParseDirectiveISAVersion();
1068   bool ParseDirectiveHSAMetadata();
1069   bool ParseDirectivePALMetadataBegin();
1070   bool ParseDirectivePALMetadata();
1071   bool ParseDirectiveAMDGPULDS();
1072 
1073   /// Common code to parse out a block of text (typically YAML) between start and
1074   /// end directives.
1075   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1076                            const char *AssemblerDirectiveEnd,
1077                            std::string &CollectString);
1078 
1079   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1080                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1081   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1082                            unsigned &RegNum, unsigned &RegWidth,
1083                            bool RestoreOnFailure = false);
1084   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1085                            unsigned &RegNum, unsigned &RegWidth,
1086                            SmallVectorImpl<AsmToken> &Tokens);
1087   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1088                            unsigned &RegWidth,
1089                            SmallVectorImpl<AsmToken> &Tokens);
1090   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1091                            unsigned &RegWidth,
1092                            SmallVectorImpl<AsmToken> &Tokens);
1093   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1094                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1095   bool ParseRegRange(unsigned& Num, unsigned& Width);
1096   unsigned getRegularReg(RegisterKind RegKind,
1097                          unsigned RegNum,
1098                          unsigned RegWidth,
1099                          SMLoc Loc);
1100 
1101   bool isRegister();
1102   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1103   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1104   void initializeGprCountSymbol(RegisterKind RegKind);
1105   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1106                              unsigned RegWidth);
1107   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1108                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1109   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1110                  bool IsGdsHardcoded);
1111 
1112 public:
1113   enum AMDGPUMatchResultTy {
1114     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1115   };
1116   enum OperandMode {
1117     OperandMode_Default,
1118     OperandMode_NSA,
1119   };
1120 
1121   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1122 
1123   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1124                const MCInstrInfo &MII,
1125                const MCTargetOptions &Options)
1126       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1127     MCAsmParserExtension::Initialize(Parser);
1128 
1129     if (getFeatureBits().none()) {
1130       // Set default features.
1131       copySTI().ToggleFeature("southern-islands");
1132     }
1133 
1134     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1135 
1136     {
1137       // TODO: make those pre-defined variables read-only.
1138       // Currently there is none suitable machinery in the core llvm-mc for this.
1139       // MCSymbol::isRedefinable is intended for another purpose, and
1140       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1141       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1142       MCContext &Ctx = getContext();
1143       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1144         MCSymbol *Sym =
1145             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1146         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1148         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1150         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151       } else {
1152         MCSymbol *Sym =
1153             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1154         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1155         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1156         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1157         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1158         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1159       }
1160       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1161         initializeGprCountSymbol(IS_VGPR);
1162         initializeGprCountSymbol(IS_SGPR);
1163       } else
1164         KernelScope.initialize(getContext());
1165     }
1166   }
1167 
1168   bool hasXNACK() const {
1169     return AMDGPU::hasXNACK(getSTI());
1170   }
1171 
1172   bool hasMIMG_R128() const {
1173     return AMDGPU::hasMIMG_R128(getSTI());
1174   }
1175 
1176   bool hasPackedD16() const {
1177     return AMDGPU::hasPackedD16(getSTI());
1178   }
1179 
1180   bool hasGFX10A16() const {
1181     return AMDGPU::hasGFX10A16(getSTI());
1182   }
1183 
1184   bool isSI() const {
1185     return AMDGPU::isSI(getSTI());
1186   }
1187 
1188   bool isCI() const {
1189     return AMDGPU::isCI(getSTI());
1190   }
1191 
1192   bool isVI() const {
1193     return AMDGPU::isVI(getSTI());
1194   }
1195 
1196   bool isGFX9() const {
1197     return AMDGPU::isGFX9(getSTI());
1198   }
1199 
1200   bool isGFX9Plus() const {
1201     return AMDGPU::isGFX9Plus(getSTI());
1202   }
1203 
1204   bool isGFX10() const {
1205     return AMDGPU::isGFX10(getSTI());
1206   }
1207 
1208   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1209 
1210   bool isGFX10_BEncoding() const {
1211     return AMDGPU::isGFX10_BEncoding(getSTI());
1212   }
1213 
1214   bool hasInv2PiInlineImm() const {
1215     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1216   }
1217 
1218   bool hasFlatOffsets() const {
1219     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1220   }
1221 
1222   bool hasSGPR102_SGPR103() const {
1223     return !isVI() && !isGFX9();
1224   }
1225 
1226   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1227 
1228   bool hasIntClamp() const {
1229     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1230   }
1231 
1232   AMDGPUTargetStreamer &getTargetStreamer() {
1233     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1234     return static_cast<AMDGPUTargetStreamer &>(TS);
1235   }
1236 
1237   const MCRegisterInfo *getMRI() const {
1238     // We need this const_cast because for some reason getContext() is not const
1239     // in MCAsmParser.
1240     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1241   }
1242 
1243   const MCInstrInfo *getMII() const {
1244     return &MII;
1245   }
1246 
1247   const FeatureBitset &getFeatureBits() const {
1248     return getSTI().getFeatureBits();
1249   }
1250 
1251   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1252   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1253   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1254 
1255   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1256   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1257   bool isForcedDPP() const { return ForcedDPP; }
1258   bool isForcedSDWA() const { return ForcedSDWA; }
1259   ArrayRef<unsigned> getMatchedVariants() const;
1260   StringRef getMatchedVariantName() const;
1261 
1262   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1263   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1264                      bool RestoreOnFailure);
1265   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1266   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1267                                         SMLoc &EndLoc) override;
1268   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1269   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1270                                       unsigned Kind) override;
1271   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1272                                OperandVector &Operands, MCStreamer &Out,
1273                                uint64_t &ErrorInfo,
1274                                bool MatchingInlineAsm) override;
1275   bool ParseDirective(AsmToken DirectiveID) override;
1276   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1277                                     OperandMode Mode = OperandMode_Default);
1278   StringRef parseMnemonicSuffix(StringRef Name);
1279   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1280                         SMLoc NameLoc, OperandVector &Operands) override;
1281   //bool ProcessInstruction(MCInst &Inst);
1282 
1283   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1284 
1285   OperandMatchResultTy
1286   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1287                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1288                      bool (*ConvertResult)(int64_t &) = nullptr);
1289 
1290   OperandMatchResultTy
1291   parseOperandArrayWithPrefix(const char *Prefix,
1292                               OperandVector &Operands,
1293                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1294                               bool (*ConvertResult)(int64_t&) = nullptr);
1295 
1296   OperandMatchResultTy
1297   parseNamedBit(StringRef Name, OperandVector &Operands,
1298                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1299   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1300                                              StringRef &Value,
1301                                              SMLoc &StringLoc);
1302 
1303   bool isModifier();
1304   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1305   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1306   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1307   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1308   bool parseSP3NegModifier();
1309   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1310   OperandMatchResultTy parseReg(OperandVector &Operands);
1311   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1312   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1313   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1314   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1315   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1316   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1317   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1318   OperandMatchResultTy parseUfmt(int64_t &Format);
1319   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1320   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1321   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1322   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1323   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1324   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1325   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1326 
1327   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1328   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1329   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1330   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1331 
1332   bool parseCnt(int64_t &IntVal);
1333   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1334   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1335 
1336 private:
1337   struct OperandInfoTy {
1338     SMLoc Loc;
1339     int64_t Id;
1340     bool IsSymbolic = false;
1341     bool IsDefined = false;
1342 
1343     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1344   };
1345 
1346   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1347   bool validateSendMsg(const OperandInfoTy &Msg,
1348                        const OperandInfoTy &Op,
1349                        const OperandInfoTy &Stream);
1350 
1351   bool parseHwregBody(OperandInfoTy &HwReg,
1352                       OperandInfoTy &Offset,
1353                       OperandInfoTy &Width);
1354   bool validateHwreg(const OperandInfoTy &HwReg,
1355                      const OperandInfoTy &Offset,
1356                      const OperandInfoTy &Width);
1357 
1358   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1359   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1360 
1361   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1362                       const OperandVector &Operands) const;
1363   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1364   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1365   SMLoc getLitLoc(const OperandVector &Operands) const;
1366   SMLoc getConstLoc(const OperandVector &Operands) const;
1367 
1368   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1369   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1370   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1371   bool validateSOPLiteral(const MCInst &Inst) const;
1372   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1373   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1374   bool validateIntClampSupported(const MCInst &Inst);
1375   bool validateMIMGAtomicDMask(const MCInst &Inst);
1376   bool validateMIMGGatherDMask(const MCInst &Inst);
1377   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1378   bool validateMIMGDataSize(const MCInst &Inst);
1379   bool validateMIMGAddrSize(const MCInst &Inst);
1380   bool validateMIMGD16(const MCInst &Inst);
1381   bool validateMIMGDim(const MCInst &Inst);
1382   bool validateLdsDirect(const MCInst &Inst);
1383   bool validateOpSel(const MCInst &Inst);
1384   bool validateVccOperand(unsigned Reg) const;
1385   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1386   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1387   bool validateDivScale(const MCInst &Inst);
1388   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1389                              const SMLoc &IDLoc);
1390   unsigned getConstantBusLimit(unsigned Opcode) const;
1391   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1392   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1393   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1394 
1395   bool isSupportedMnemo(StringRef Mnemo,
1396                         const FeatureBitset &FBS);
1397   bool isSupportedMnemo(StringRef Mnemo,
1398                         const FeatureBitset &FBS,
1399                         ArrayRef<unsigned> Variants);
1400   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1401 
1402   bool isId(const StringRef Id) const;
1403   bool isId(const AsmToken &Token, const StringRef Id) const;
1404   bool isToken(const AsmToken::TokenKind Kind) const;
1405   bool trySkipId(const StringRef Id);
1406   bool trySkipId(const StringRef Pref, const StringRef Id);
1407   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1408   bool trySkipToken(const AsmToken::TokenKind Kind);
1409   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1410   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1411   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1412 
1413   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1414   AsmToken::TokenKind getTokenKind() const;
1415   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1416   bool parseExpr(OperandVector &Operands);
1417   StringRef getTokenStr() const;
1418   AsmToken peekToken();
1419   AsmToken getToken() const;
1420   SMLoc getLoc() const;
1421   void lex();
1422 
1423 public:
1424   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1425   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1426 
1427   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1428   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1429   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1430   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1431   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1432   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1433 
1434   bool parseSwizzleOperand(int64_t &Op,
1435                            const unsigned MinVal,
1436                            const unsigned MaxVal,
1437                            const StringRef ErrMsg,
1438                            SMLoc &Loc);
1439   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1440                             const unsigned MinVal,
1441                             const unsigned MaxVal,
1442                             const StringRef ErrMsg);
1443   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1444   bool parseSwizzleOffset(int64_t &Imm);
1445   bool parseSwizzleMacro(int64_t &Imm);
1446   bool parseSwizzleQuadPerm(int64_t &Imm);
1447   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1448   bool parseSwizzleBroadcast(int64_t &Imm);
1449   bool parseSwizzleSwap(int64_t &Imm);
1450   bool parseSwizzleReverse(int64_t &Imm);
1451 
1452   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1453   int64_t parseGPRIdxMacro();
1454 
1455   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1456   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1457   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1458   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1459   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1460 
1461   AMDGPUOperand::Ptr defaultDLC() const;
1462   AMDGPUOperand::Ptr defaultGLC() const;
1463   AMDGPUOperand::Ptr defaultGLC_1() const;
1464   AMDGPUOperand::Ptr defaultSLC() const;
1465 
1466   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1467   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1468   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1469   AMDGPUOperand::Ptr defaultFlatOffset() const;
1470 
1471   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1472 
1473   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1474                OptionalImmIndexMap &OptionalIdx);
1475   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1476   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1477   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1478 
1479   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1480 
1481   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1482                bool IsAtomic = false);
1483   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1484   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1485 
1486   bool parseDimId(unsigned &Encoding);
1487   OperandMatchResultTy parseDim(OperandVector &Operands);
1488   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1489   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1490   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1491   int64_t parseDPPCtrlSel(StringRef Ctrl);
1492   int64_t parseDPPCtrlPerm();
1493   AMDGPUOperand::Ptr defaultRowMask() const;
1494   AMDGPUOperand::Ptr defaultBankMask() const;
1495   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1496   AMDGPUOperand::Ptr defaultFI() const;
1497   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1498   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1499 
1500   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1501                                     AMDGPUOperand::ImmTy Type);
1502   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1503   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1504   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1505   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1506   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1507   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1508   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1509                uint64_t BasicInstType,
1510                bool SkipDstVcc = false,
1511                bool SkipSrcVcc = false);
1512 
1513   AMDGPUOperand::Ptr defaultBLGP() const;
1514   AMDGPUOperand::Ptr defaultCBSZ() const;
1515   AMDGPUOperand::Ptr defaultABID() const;
1516 
1517   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1518   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1519 };
1520 
1521 struct OptionalOperand {
1522   const char *Name;
1523   AMDGPUOperand::ImmTy Type;
1524   bool IsBit;
1525   bool (*ConvertResult)(int64_t&);
1526 };
1527 
1528 } // end anonymous namespace
1529 
1530 // May be called with integer type with equivalent bitwidth.
1531 static const fltSemantics *getFltSemantics(unsigned Size) {
1532   switch (Size) {
1533   case 4:
1534     return &APFloat::IEEEsingle();
1535   case 8:
1536     return &APFloat::IEEEdouble();
1537   case 2:
1538     return &APFloat::IEEEhalf();
1539   default:
1540     llvm_unreachable("unsupported fp type");
1541   }
1542 }
1543 
1544 static const fltSemantics *getFltSemantics(MVT VT) {
1545   return getFltSemantics(VT.getSizeInBits() / 8);
1546 }
1547 
1548 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1549   switch (OperandType) {
1550   case AMDGPU::OPERAND_REG_IMM_INT32:
1551   case AMDGPU::OPERAND_REG_IMM_FP32:
1552   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1553   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1554   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1555   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1556     return &APFloat::IEEEsingle();
1557   case AMDGPU::OPERAND_REG_IMM_INT64:
1558   case AMDGPU::OPERAND_REG_IMM_FP64:
1559   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1560   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1561     return &APFloat::IEEEdouble();
1562   case AMDGPU::OPERAND_REG_IMM_INT16:
1563   case AMDGPU::OPERAND_REG_IMM_FP16:
1564   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1565   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1566   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1567   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1568   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1569   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1570   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1571   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1572   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1573   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1574     return &APFloat::IEEEhalf();
1575   default:
1576     llvm_unreachable("unsupported fp type");
1577   }
1578 }
1579 
1580 //===----------------------------------------------------------------------===//
1581 // Operand
1582 //===----------------------------------------------------------------------===//
1583 
1584 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1585   bool Lost;
1586 
1587   // Convert literal to single precision
1588   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1589                                                APFloat::rmNearestTiesToEven,
1590                                                &Lost);
1591   // We allow precision lost but not overflow or underflow
1592   if (Status != APFloat::opOK &&
1593       Lost &&
1594       ((Status & APFloat::opOverflow)  != 0 ||
1595        (Status & APFloat::opUnderflow) != 0)) {
1596     return false;
1597   }
1598 
1599   return true;
1600 }
1601 
1602 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1603   return isUIntN(Size, Val) || isIntN(Size, Val);
1604 }
1605 
1606 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1607   if (VT.getScalarType() == MVT::i16) {
1608     // FP immediate values are broken.
1609     return isInlinableIntLiteral(Val);
1610   }
1611 
1612   // f16/v2f16 operands work correctly for all values.
1613   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1614 }
1615 
1616 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1617 
1618   // This is a hack to enable named inline values like
1619   // shared_base with both 32-bit and 64-bit operands.
1620   // Note that these values are defined as
1621   // 32-bit operands only.
1622   if (isInlineValue()) {
1623     return true;
1624   }
1625 
1626   if (!isImmTy(ImmTyNone)) {
1627     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1628     return false;
1629   }
1630   // TODO: We should avoid using host float here. It would be better to
1631   // check the float bit values which is what a few other places do.
1632   // We've had bot failures before due to weird NaN support on mips hosts.
1633 
1634   APInt Literal(64, Imm.Val);
1635 
1636   if (Imm.IsFPImm) { // We got fp literal token
1637     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1638       return AMDGPU::isInlinableLiteral64(Imm.Val,
1639                                           AsmParser->hasInv2PiInlineImm());
1640     }
1641 
1642     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1643     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1644       return false;
1645 
1646     if (type.getScalarSizeInBits() == 16) {
1647       return isInlineableLiteralOp16(
1648         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1649         type, AsmParser->hasInv2PiInlineImm());
1650     }
1651 
1652     // Check if single precision literal is inlinable
1653     return AMDGPU::isInlinableLiteral32(
1654       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1655       AsmParser->hasInv2PiInlineImm());
1656   }
1657 
1658   // We got int literal token.
1659   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1660     return AMDGPU::isInlinableLiteral64(Imm.Val,
1661                                         AsmParser->hasInv2PiInlineImm());
1662   }
1663 
1664   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1665     return false;
1666   }
1667 
1668   if (type.getScalarSizeInBits() == 16) {
1669     return isInlineableLiteralOp16(
1670       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1671       type, AsmParser->hasInv2PiInlineImm());
1672   }
1673 
1674   return AMDGPU::isInlinableLiteral32(
1675     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1676     AsmParser->hasInv2PiInlineImm());
1677 }
1678 
1679 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1680   // Check that this immediate can be added as literal
1681   if (!isImmTy(ImmTyNone)) {
1682     return false;
1683   }
1684 
1685   if (!Imm.IsFPImm) {
1686     // We got int literal token.
1687 
1688     if (type == MVT::f64 && hasFPModifiers()) {
1689       // Cannot apply fp modifiers to int literals preserving the same semantics
1690       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1691       // disable these cases.
1692       return false;
1693     }
1694 
1695     unsigned Size = type.getSizeInBits();
1696     if (Size == 64)
1697       Size = 32;
1698 
1699     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1700     // types.
1701     return isSafeTruncation(Imm.Val, Size);
1702   }
1703 
1704   // We got fp literal token
1705   if (type == MVT::f64) { // Expected 64-bit fp operand
1706     // We would set low 64-bits of literal to zeroes but we accept this literals
1707     return true;
1708   }
1709 
1710   if (type == MVT::i64) { // Expected 64-bit int operand
1711     // We don't allow fp literals in 64-bit integer instructions. It is
1712     // unclear how we should encode them.
1713     return false;
1714   }
1715 
1716   // We allow fp literals with f16x2 operands assuming that the specified
1717   // literal goes into the lower half and the upper half is zero. We also
1718   // require that the literal may be losslesly converted to f16.
1719   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1720                      (type == MVT::v2i16)? MVT::i16 : type;
1721 
1722   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1723   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1724 }
1725 
1726 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1727   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1728 }
1729 
1730 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1731   if (AsmParser->isVI())
1732     return isVReg32();
1733   else if (AsmParser->isGFX9Plus())
1734     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1735   else
1736     return false;
1737 }
1738 
1739 bool AMDGPUOperand::isSDWAFP16Operand() const {
1740   return isSDWAOperand(MVT::f16);
1741 }
1742 
1743 bool AMDGPUOperand::isSDWAFP32Operand() const {
1744   return isSDWAOperand(MVT::f32);
1745 }
1746 
1747 bool AMDGPUOperand::isSDWAInt16Operand() const {
1748   return isSDWAOperand(MVT::i16);
1749 }
1750 
1751 bool AMDGPUOperand::isSDWAInt32Operand() const {
1752   return isSDWAOperand(MVT::i32);
1753 }
1754 
1755 bool AMDGPUOperand::isBoolReg() const {
1756   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1757          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1758 }
1759 
1760 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1761 {
1762   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1763   assert(Size == 2 || Size == 4 || Size == 8);
1764 
1765   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1766 
1767   if (Imm.Mods.Abs) {
1768     Val &= ~FpSignMask;
1769   }
1770   if (Imm.Mods.Neg) {
1771     Val ^= FpSignMask;
1772   }
1773 
1774   return Val;
1775 }
1776 
1777 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1778   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1779                              Inst.getNumOperands())) {
1780     addLiteralImmOperand(Inst, Imm.Val,
1781                          ApplyModifiers &
1782                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1783   } else {
1784     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1785     Inst.addOperand(MCOperand::createImm(Imm.Val));
1786     setImmKindNone();
1787   }
1788 }
1789 
1790 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1791   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1792   auto OpNum = Inst.getNumOperands();
1793   // Check that this operand accepts literals
1794   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1795 
1796   if (ApplyModifiers) {
1797     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1798     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1799     Val = applyInputFPModifiers(Val, Size);
1800   }
1801 
1802   APInt Literal(64, Val);
1803   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1804 
1805   if (Imm.IsFPImm) { // We got fp literal token
1806     switch (OpTy) {
1807     case AMDGPU::OPERAND_REG_IMM_INT64:
1808     case AMDGPU::OPERAND_REG_IMM_FP64:
1809     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1810     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1811       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1812                                        AsmParser->hasInv2PiInlineImm())) {
1813         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1814         setImmKindConst();
1815         return;
1816       }
1817 
1818       // Non-inlineable
1819       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1820         // For fp operands we check if low 32 bits are zeros
1821         if (Literal.getLoBits(32) != 0) {
1822           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1823           "Can't encode literal as exact 64-bit floating-point operand. "
1824           "Low 32-bits will be set to zero");
1825         }
1826 
1827         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1828         setImmKindLiteral();
1829         return;
1830       }
1831 
1832       // We don't allow fp literals in 64-bit integer instructions. It is
1833       // unclear how we should encode them. This case should be checked earlier
1834       // in predicate methods (isLiteralImm())
1835       llvm_unreachable("fp literal in 64-bit integer instruction.");
1836 
1837     case AMDGPU::OPERAND_REG_IMM_INT32:
1838     case AMDGPU::OPERAND_REG_IMM_FP32:
1839     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1840     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1841     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1842     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1843     case AMDGPU::OPERAND_REG_IMM_INT16:
1844     case AMDGPU::OPERAND_REG_IMM_FP16:
1845     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1846     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1847     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1848     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1849     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1850     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1851     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1852     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1853     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1854     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1855       bool lost;
1856       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1857       // Convert literal to single precision
1858       FPLiteral.convert(*getOpFltSemantics(OpTy),
1859                         APFloat::rmNearestTiesToEven, &lost);
1860       // We allow precision lost but not overflow or underflow. This should be
1861       // checked earlier in isLiteralImm()
1862 
1863       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1864       Inst.addOperand(MCOperand::createImm(ImmVal));
1865       setImmKindLiteral();
1866       return;
1867     }
1868     default:
1869       llvm_unreachable("invalid operand size");
1870     }
1871 
1872     return;
1873   }
1874 
1875   // We got int literal token.
1876   // Only sign extend inline immediates.
1877   switch (OpTy) {
1878   case AMDGPU::OPERAND_REG_IMM_INT32:
1879   case AMDGPU::OPERAND_REG_IMM_FP32:
1880   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1881   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1882   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1883   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1884   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1885   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1886     if (isSafeTruncation(Val, 32) &&
1887         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1888                                      AsmParser->hasInv2PiInlineImm())) {
1889       Inst.addOperand(MCOperand::createImm(Val));
1890       setImmKindConst();
1891       return;
1892     }
1893 
1894     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1895     setImmKindLiteral();
1896     return;
1897 
1898   case AMDGPU::OPERAND_REG_IMM_INT64:
1899   case AMDGPU::OPERAND_REG_IMM_FP64:
1900   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1901   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1902     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1903       Inst.addOperand(MCOperand::createImm(Val));
1904       setImmKindConst();
1905       return;
1906     }
1907 
1908     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1909     setImmKindLiteral();
1910     return;
1911 
1912   case AMDGPU::OPERAND_REG_IMM_INT16:
1913   case AMDGPU::OPERAND_REG_IMM_FP16:
1914   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1915   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1916   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1917   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1918     if (isSafeTruncation(Val, 16) &&
1919         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1920                                      AsmParser->hasInv2PiInlineImm())) {
1921       Inst.addOperand(MCOperand::createImm(Val));
1922       setImmKindConst();
1923       return;
1924     }
1925 
1926     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1927     setImmKindLiteral();
1928     return;
1929 
1930   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1931   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1932   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1933   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1934     assert(isSafeTruncation(Val, 16));
1935     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1936                                         AsmParser->hasInv2PiInlineImm()));
1937 
1938     Inst.addOperand(MCOperand::createImm(Val));
1939     return;
1940   }
1941   default:
1942     llvm_unreachable("invalid operand size");
1943   }
1944 }
1945 
1946 template <unsigned Bitwidth>
1947 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1948   APInt Literal(64, Imm.Val);
1949   setImmKindNone();
1950 
1951   if (!Imm.IsFPImm) {
1952     // We got int literal token.
1953     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1954     return;
1955   }
1956 
1957   bool Lost;
1958   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1959   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1960                     APFloat::rmNearestTiesToEven, &Lost);
1961   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1962 }
1963 
1964 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1965   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1966 }
1967 
1968 static bool isInlineValue(unsigned Reg) {
1969   switch (Reg) {
1970   case AMDGPU::SRC_SHARED_BASE:
1971   case AMDGPU::SRC_SHARED_LIMIT:
1972   case AMDGPU::SRC_PRIVATE_BASE:
1973   case AMDGPU::SRC_PRIVATE_LIMIT:
1974   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1975     return true;
1976   case AMDGPU::SRC_VCCZ:
1977   case AMDGPU::SRC_EXECZ:
1978   case AMDGPU::SRC_SCC:
1979     return true;
1980   case AMDGPU::SGPR_NULL:
1981     return true;
1982   default:
1983     return false;
1984   }
1985 }
1986 
1987 bool AMDGPUOperand::isInlineValue() const {
1988   return isRegKind() && ::isInlineValue(getReg());
1989 }
1990 
1991 //===----------------------------------------------------------------------===//
1992 // AsmParser
1993 //===----------------------------------------------------------------------===//
1994 
1995 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1996   if (Is == IS_VGPR) {
1997     switch (RegWidth) {
1998       default: return -1;
1999       case 1: return AMDGPU::VGPR_32RegClassID;
2000       case 2: return AMDGPU::VReg_64RegClassID;
2001       case 3: return AMDGPU::VReg_96RegClassID;
2002       case 4: return AMDGPU::VReg_128RegClassID;
2003       case 5: return AMDGPU::VReg_160RegClassID;
2004       case 6: return AMDGPU::VReg_192RegClassID;
2005       case 8: return AMDGPU::VReg_256RegClassID;
2006       case 16: return AMDGPU::VReg_512RegClassID;
2007       case 32: return AMDGPU::VReg_1024RegClassID;
2008     }
2009   } else if (Is == IS_TTMP) {
2010     switch (RegWidth) {
2011       default: return -1;
2012       case 1: return AMDGPU::TTMP_32RegClassID;
2013       case 2: return AMDGPU::TTMP_64RegClassID;
2014       case 4: return AMDGPU::TTMP_128RegClassID;
2015       case 8: return AMDGPU::TTMP_256RegClassID;
2016       case 16: return AMDGPU::TTMP_512RegClassID;
2017     }
2018   } else if (Is == IS_SGPR) {
2019     switch (RegWidth) {
2020       default: return -1;
2021       case 1: return AMDGPU::SGPR_32RegClassID;
2022       case 2: return AMDGPU::SGPR_64RegClassID;
2023       case 3: return AMDGPU::SGPR_96RegClassID;
2024       case 4: return AMDGPU::SGPR_128RegClassID;
2025       case 5: return AMDGPU::SGPR_160RegClassID;
2026       case 6: return AMDGPU::SGPR_192RegClassID;
2027       case 8: return AMDGPU::SGPR_256RegClassID;
2028       case 16: return AMDGPU::SGPR_512RegClassID;
2029     }
2030   } else if (Is == IS_AGPR) {
2031     switch (RegWidth) {
2032       default: return -1;
2033       case 1: return AMDGPU::AGPR_32RegClassID;
2034       case 2: return AMDGPU::AReg_64RegClassID;
2035       case 3: return AMDGPU::AReg_96RegClassID;
2036       case 4: return AMDGPU::AReg_128RegClassID;
2037       case 5: return AMDGPU::AReg_160RegClassID;
2038       case 6: return AMDGPU::AReg_192RegClassID;
2039       case 8: return AMDGPU::AReg_256RegClassID;
2040       case 16: return AMDGPU::AReg_512RegClassID;
2041       case 32: return AMDGPU::AReg_1024RegClassID;
2042     }
2043   }
2044   return -1;
2045 }
2046 
2047 static unsigned getSpecialRegForName(StringRef RegName) {
2048   return StringSwitch<unsigned>(RegName)
2049     .Case("exec", AMDGPU::EXEC)
2050     .Case("vcc", AMDGPU::VCC)
2051     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2052     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2053     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2054     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2055     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2056     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2057     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2058     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2059     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2060     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2061     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2062     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2063     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2064     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2065     .Case("m0", AMDGPU::M0)
2066     .Case("vccz", AMDGPU::SRC_VCCZ)
2067     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2068     .Case("execz", AMDGPU::SRC_EXECZ)
2069     .Case("src_execz", AMDGPU::SRC_EXECZ)
2070     .Case("scc", AMDGPU::SRC_SCC)
2071     .Case("src_scc", AMDGPU::SRC_SCC)
2072     .Case("tba", AMDGPU::TBA)
2073     .Case("tma", AMDGPU::TMA)
2074     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2075     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2076     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2077     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2078     .Case("vcc_lo", AMDGPU::VCC_LO)
2079     .Case("vcc_hi", AMDGPU::VCC_HI)
2080     .Case("exec_lo", AMDGPU::EXEC_LO)
2081     .Case("exec_hi", AMDGPU::EXEC_HI)
2082     .Case("tma_lo", AMDGPU::TMA_LO)
2083     .Case("tma_hi", AMDGPU::TMA_HI)
2084     .Case("tba_lo", AMDGPU::TBA_LO)
2085     .Case("tba_hi", AMDGPU::TBA_HI)
2086     .Case("pc", AMDGPU::PC_REG)
2087     .Case("null", AMDGPU::SGPR_NULL)
2088     .Default(AMDGPU::NoRegister);
2089 }
2090 
2091 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2092                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2093   auto R = parseRegister();
2094   if (!R) return true;
2095   assert(R->isReg());
2096   RegNo = R->getReg();
2097   StartLoc = R->getStartLoc();
2098   EndLoc = R->getEndLoc();
2099   return false;
2100 }
2101 
2102 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2103                                     SMLoc &EndLoc) {
2104   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2105 }
2106 
2107 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2108                                                        SMLoc &StartLoc,
2109                                                        SMLoc &EndLoc) {
2110   bool Result =
2111       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2112   bool PendingErrors = getParser().hasPendingError();
2113   getParser().clearPendingErrors();
2114   if (PendingErrors)
2115     return MatchOperand_ParseFail;
2116   if (Result)
2117     return MatchOperand_NoMatch;
2118   return MatchOperand_Success;
2119 }
2120 
2121 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2122                                             RegisterKind RegKind, unsigned Reg1,
2123                                             SMLoc Loc) {
2124   switch (RegKind) {
2125   case IS_SPECIAL:
2126     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2127       Reg = AMDGPU::EXEC;
2128       RegWidth = 2;
2129       return true;
2130     }
2131     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2132       Reg = AMDGPU::FLAT_SCR;
2133       RegWidth = 2;
2134       return true;
2135     }
2136     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2137       Reg = AMDGPU::XNACK_MASK;
2138       RegWidth = 2;
2139       return true;
2140     }
2141     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2142       Reg = AMDGPU::VCC;
2143       RegWidth = 2;
2144       return true;
2145     }
2146     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2147       Reg = AMDGPU::TBA;
2148       RegWidth = 2;
2149       return true;
2150     }
2151     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2152       Reg = AMDGPU::TMA;
2153       RegWidth = 2;
2154       return true;
2155     }
2156     Error(Loc, "register does not fit in the list");
2157     return false;
2158   case IS_VGPR:
2159   case IS_SGPR:
2160   case IS_AGPR:
2161   case IS_TTMP:
2162     if (Reg1 != Reg + RegWidth) {
2163       Error(Loc, "registers in a list must have consecutive indices");
2164       return false;
2165     }
2166     RegWidth++;
2167     return true;
2168   default:
2169     llvm_unreachable("unexpected register kind");
2170   }
2171 }
2172 
2173 struct RegInfo {
2174   StringLiteral Name;
2175   RegisterKind Kind;
2176 };
2177 
2178 static constexpr RegInfo RegularRegisters[] = {
2179   {{"v"},    IS_VGPR},
2180   {{"s"},    IS_SGPR},
2181   {{"ttmp"}, IS_TTMP},
2182   {{"acc"},  IS_AGPR},
2183   {{"a"},    IS_AGPR},
2184 };
2185 
2186 static bool isRegularReg(RegisterKind Kind) {
2187   return Kind == IS_VGPR ||
2188          Kind == IS_SGPR ||
2189          Kind == IS_TTMP ||
2190          Kind == IS_AGPR;
2191 }
2192 
2193 static const RegInfo* getRegularRegInfo(StringRef Str) {
2194   for (const RegInfo &Reg : RegularRegisters)
2195     if (Str.startswith(Reg.Name))
2196       return &Reg;
2197   return nullptr;
2198 }
2199 
2200 static bool getRegNum(StringRef Str, unsigned& Num) {
2201   return !Str.getAsInteger(10, Num);
2202 }
2203 
2204 bool
2205 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2206                             const AsmToken &NextToken) const {
2207 
2208   // A list of consecutive registers: [s0,s1,s2,s3]
2209   if (Token.is(AsmToken::LBrac))
2210     return true;
2211 
2212   if (!Token.is(AsmToken::Identifier))
2213     return false;
2214 
2215   // A single register like s0 or a range of registers like s[0:1]
2216 
2217   StringRef Str = Token.getString();
2218   const RegInfo *Reg = getRegularRegInfo(Str);
2219   if (Reg) {
2220     StringRef RegName = Reg->Name;
2221     StringRef RegSuffix = Str.substr(RegName.size());
2222     if (!RegSuffix.empty()) {
2223       unsigned Num;
2224       // A single register with an index: rXX
2225       if (getRegNum(RegSuffix, Num))
2226         return true;
2227     } else {
2228       // A range of registers: r[XX:YY].
2229       if (NextToken.is(AsmToken::LBrac))
2230         return true;
2231     }
2232   }
2233 
2234   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2235 }
2236 
2237 bool
2238 AMDGPUAsmParser::isRegister()
2239 {
2240   return isRegister(getToken(), peekToken());
2241 }
2242 
2243 unsigned
2244 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2245                                unsigned RegNum,
2246                                unsigned RegWidth,
2247                                SMLoc Loc) {
2248 
2249   assert(isRegularReg(RegKind));
2250 
2251   unsigned AlignSize = 1;
2252   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2253     // SGPR and TTMP registers must be aligned.
2254     // Max required alignment is 4 dwords.
2255     AlignSize = std::min(RegWidth, 4u);
2256   }
2257 
2258   if (RegNum % AlignSize != 0) {
2259     Error(Loc, "invalid register alignment");
2260     return AMDGPU::NoRegister;
2261   }
2262 
2263   unsigned RegIdx = RegNum / AlignSize;
2264   int RCID = getRegClass(RegKind, RegWidth);
2265   if (RCID == -1) {
2266     Error(Loc, "invalid or unsupported register size");
2267     return AMDGPU::NoRegister;
2268   }
2269 
2270   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2271   const MCRegisterClass RC = TRI->getRegClass(RCID);
2272   if (RegIdx >= RC.getNumRegs()) {
2273     Error(Loc, "register index is out of range");
2274     return AMDGPU::NoRegister;
2275   }
2276 
2277   return RC.getRegister(RegIdx);
2278 }
2279 
2280 bool
2281 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2282   int64_t RegLo, RegHi;
2283   if (!skipToken(AsmToken::LBrac, "missing register index"))
2284     return false;
2285 
2286   SMLoc FirstIdxLoc = getLoc();
2287   SMLoc SecondIdxLoc;
2288 
2289   if (!parseExpr(RegLo))
2290     return false;
2291 
2292   if (trySkipToken(AsmToken::Colon)) {
2293     SecondIdxLoc = getLoc();
2294     if (!parseExpr(RegHi))
2295       return false;
2296   } else {
2297     RegHi = RegLo;
2298   }
2299 
2300   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2301     return false;
2302 
2303   if (!isUInt<32>(RegLo)) {
2304     Error(FirstIdxLoc, "invalid register index");
2305     return false;
2306   }
2307 
2308   if (!isUInt<32>(RegHi)) {
2309     Error(SecondIdxLoc, "invalid register index");
2310     return false;
2311   }
2312 
2313   if (RegLo > RegHi) {
2314     Error(FirstIdxLoc, "first register index should not exceed second index");
2315     return false;
2316   }
2317 
2318   Num = static_cast<unsigned>(RegLo);
2319   Width = (RegHi - RegLo) + 1;
2320   return true;
2321 }
2322 
2323 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2324                                           unsigned &RegNum, unsigned &RegWidth,
2325                                           SmallVectorImpl<AsmToken> &Tokens) {
2326   assert(isToken(AsmToken::Identifier));
2327   unsigned Reg = getSpecialRegForName(getTokenStr());
2328   if (Reg) {
2329     RegNum = 0;
2330     RegWidth = 1;
2331     RegKind = IS_SPECIAL;
2332     Tokens.push_back(getToken());
2333     lex(); // skip register name
2334   }
2335   return Reg;
2336 }
2337 
2338 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2339                                           unsigned &RegNum, unsigned &RegWidth,
2340                                           SmallVectorImpl<AsmToken> &Tokens) {
2341   assert(isToken(AsmToken::Identifier));
2342   StringRef RegName = getTokenStr();
2343   auto Loc = getLoc();
2344 
2345   const RegInfo *RI = getRegularRegInfo(RegName);
2346   if (!RI) {
2347     Error(Loc, "invalid register name");
2348     return AMDGPU::NoRegister;
2349   }
2350 
2351   Tokens.push_back(getToken());
2352   lex(); // skip register name
2353 
2354   RegKind = RI->Kind;
2355   StringRef RegSuffix = RegName.substr(RI->Name.size());
2356   if (!RegSuffix.empty()) {
2357     // Single 32-bit register: vXX.
2358     if (!getRegNum(RegSuffix, RegNum)) {
2359       Error(Loc, "invalid register index");
2360       return AMDGPU::NoRegister;
2361     }
2362     RegWidth = 1;
2363   } else {
2364     // Range of registers: v[XX:YY]. ":YY" is optional.
2365     if (!ParseRegRange(RegNum, RegWidth))
2366       return AMDGPU::NoRegister;
2367   }
2368 
2369   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2370 }
2371 
2372 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2373                                        unsigned &RegWidth,
2374                                        SmallVectorImpl<AsmToken> &Tokens) {
2375   unsigned Reg = AMDGPU::NoRegister;
2376   auto ListLoc = getLoc();
2377 
2378   if (!skipToken(AsmToken::LBrac,
2379                  "expected a register or a list of registers")) {
2380     return AMDGPU::NoRegister;
2381   }
2382 
2383   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2384 
2385   auto Loc = getLoc();
2386   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2387     return AMDGPU::NoRegister;
2388   if (RegWidth != 1) {
2389     Error(Loc, "expected a single 32-bit register");
2390     return AMDGPU::NoRegister;
2391   }
2392 
2393   for (; trySkipToken(AsmToken::Comma); ) {
2394     RegisterKind NextRegKind;
2395     unsigned NextReg, NextRegNum, NextRegWidth;
2396     Loc = getLoc();
2397 
2398     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2399                              NextRegNum, NextRegWidth,
2400                              Tokens)) {
2401       return AMDGPU::NoRegister;
2402     }
2403     if (NextRegWidth != 1) {
2404       Error(Loc, "expected a single 32-bit register");
2405       return AMDGPU::NoRegister;
2406     }
2407     if (NextRegKind != RegKind) {
2408       Error(Loc, "registers in a list must be of the same kind");
2409       return AMDGPU::NoRegister;
2410     }
2411     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2412       return AMDGPU::NoRegister;
2413   }
2414 
2415   if (!skipToken(AsmToken::RBrac,
2416                  "expected a comma or a closing square bracket")) {
2417     return AMDGPU::NoRegister;
2418   }
2419 
2420   if (isRegularReg(RegKind))
2421     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2422 
2423   return Reg;
2424 }
2425 
2426 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2427                                           unsigned &RegNum, unsigned &RegWidth,
2428                                           SmallVectorImpl<AsmToken> &Tokens) {
2429   auto Loc = getLoc();
2430   Reg = AMDGPU::NoRegister;
2431 
2432   if (isToken(AsmToken::Identifier)) {
2433     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2434     if (Reg == AMDGPU::NoRegister)
2435       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2436   } else {
2437     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2438   }
2439 
2440   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2441   if (Reg == AMDGPU::NoRegister) {
2442     assert(Parser.hasPendingError());
2443     return false;
2444   }
2445 
2446   if (!subtargetHasRegister(*TRI, Reg)) {
2447     if (Reg == AMDGPU::SGPR_NULL) {
2448       Error(Loc, "'null' operand is not supported on this GPU");
2449     } else {
2450       Error(Loc, "register not available on this GPU");
2451     }
2452     return false;
2453   }
2454 
2455   return true;
2456 }
2457 
2458 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2459                                           unsigned &RegNum, unsigned &RegWidth,
2460                                           bool RestoreOnFailure /*=false*/) {
2461   Reg = AMDGPU::NoRegister;
2462 
2463   SmallVector<AsmToken, 1> Tokens;
2464   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2465     if (RestoreOnFailure) {
2466       while (!Tokens.empty()) {
2467         getLexer().UnLex(Tokens.pop_back_val());
2468       }
2469     }
2470     return true;
2471   }
2472   return false;
2473 }
2474 
2475 Optional<StringRef>
2476 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2477   switch (RegKind) {
2478   case IS_VGPR:
2479     return StringRef(".amdgcn.next_free_vgpr");
2480   case IS_SGPR:
2481     return StringRef(".amdgcn.next_free_sgpr");
2482   default:
2483     return None;
2484   }
2485 }
2486 
2487 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2488   auto SymbolName = getGprCountSymbolName(RegKind);
2489   assert(SymbolName && "initializing invalid register kind");
2490   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2491   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2492 }
2493 
2494 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2495                                             unsigned DwordRegIndex,
2496                                             unsigned RegWidth) {
2497   // Symbols are only defined for GCN targets
2498   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2499     return true;
2500 
2501   auto SymbolName = getGprCountSymbolName(RegKind);
2502   if (!SymbolName)
2503     return true;
2504   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2505 
2506   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2507   int64_t OldCount;
2508 
2509   if (!Sym->isVariable())
2510     return !Error(getLoc(),
2511                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2512   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2513     return !Error(
2514         getLoc(),
2515         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2516 
2517   if (OldCount <= NewMax)
2518     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2519 
2520   return true;
2521 }
2522 
2523 std::unique_ptr<AMDGPUOperand>
2524 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2525   const auto &Tok = getToken();
2526   SMLoc StartLoc = Tok.getLoc();
2527   SMLoc EndLoc = Tok.getEndLoc();
2528   RegisterKind RegKind;
2529   unsigned Reg, RegNum, RegWidth;
2530 
2531   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2532     return nullptr;
2533   }
2534   if (isHsaAbiVersion3(&getSTI())) {
2535     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2536       return nullptr;
2537   } else
2538     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2539   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2540 }
2541 
2542 OperandMatchResultTy
2543 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2544   // TODO: add syntactic sugar for 1/(2*PI)
2545 
2546   assert(!isRegister());
2547   assert(!isModifier());
2548 
2549   const auto& Tok = getToken();
2550   const auto& NextTok = peekToken();
2551   bool IsReal = Tok.is(AsmToken::Real);
2552   SMLoc S = getLoc();
2553   bool Negate = false;
2554 
2555   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2556     lex();
2557     IsReal = true;
2558     Negate = true;
2559   }
2560 
2561   if (IsReal) {
2562     // Floating-point expressions are not supported.
2563     // Can only allow floating-point literals with an
2564     // optional sign.
2565 
2566     StringRef Num = getTokenStr();
2567     lex();
2568 
2569     APFloat RealVal(APFloat::IEEEdouble());
2570     auto roundMode = APFloat::rmNearestTiesToEven;
2571     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2572       return MatchOperand_ParseFail;
2573     }
2574     if (Negate)
2575       RealVal.changeSign();
2576 
2577     Operands.push_back(
2578       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2579                                AMDGPUOperand::ImmTyNone, true));
2580 
2581     return MatchOperand_Success;
2582 
2583   } else {
2584     int64_t IntVal;
2585     const MCExpr *Expr;
2586     SMLoc S = getLoc();
2587 
2588     if (HasSP3AbsModifier) {
2589       // This is a workaround for handling expressions
2590       // as arguments of SP3 'abs' modifier, for example:
2591       //     |1.0|
2592       //     |-1|
2593       //     |1+x|
2594       // This syntax is not compatible with syntax of standard
2595       // MC expressions (due to the trailing '|').
2596       SMLoc EndLoc;
2597       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2598         return MatchOperand_ParseFail;
2599     } else {
2600       if (Parser.parseExpression(Expr))
2601         return MatchOperand_ParseFail;
2602     }
2603 
2604     if (Expr->evaluateAsAbsolute(IntVal)) {
2605       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2606     } else {
2607       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2608     }
2609 
2610     return MatchOperand_Success;
2611   }
2612 
2613   return MatchOperand_NoMatch;
2614 }
2615 
2616 OperandMatchResultTy
2617 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2618   if (!isRegister())
2619     return MatchOperand_NoMatch;
2620 
2621   if (auto R = parseRegister()) {
2622     assert(R->isReg());
2623     Operands.push_back(std::move(R));
2624     return MatchOperand_Success;
2625   }
2626   return MatchOperand_ParseFail;
2627 }
2628 
2629 OperandMatchResultTy
2630 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2631   auto res = parseReg(Operands);
2632   if (res != MatchOperand_NoMatch) {
2633     return res;
2634   } else if (isModifier()) {
2635     return MatchOperand_NoMatch;
2636   } else {
2637     return parseImm(Operands, HasSP3AbsMod);
2638   }
2639 }
2640 
2641 bool
2642 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2643   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2644     const auto &str = Token.getString();
2645     return str == "abs" || str == "neg" || str == "sext";
2646   }
2647   return false;
2648 }
2649 
2650 bool
2651 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2652   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2653 }
2654 
2655 bool
2656 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2657   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2658 }
2659 
2660 bool
2661 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2662   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2663 }
2664 
2665 // Check if this is an operand modifier or an opcode modifier
2666 // which may look like an expression but it is not. We should
2667 // avoid parsing these modifiers as expressions. Currently
2668 // recognized sequences are:
2669 //   |...|
2670 //   abs(...)
2671 //   neg(...)
2672 //   sext(...)
2673 //   -reg
2674 //   -|...|
2675 //   -abs(...)
2676 //   name:...
2677 // Note that simple opcode modifiers like 'gds' may be parsed as
2678 // expressions; this is a special case. See getExpressionAsToken.
2679 //
2680 bool
2681 AMDGPUAsmParser::isModifier() {
2682 
2683   AsmToken Tok = getToken();
2684   AsmToken NextToken[2];
2685   peekTokens(NextToken);
2686 
2687   return isOperandModifier(Tok, NextToken[0]) ||
2688          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2689          isOpcodeModifierWithVal(Tok, NextToken[0]);
2690 }
2691 
2692 // Check if the current token is an SP3 'neg' modifier.
2693 // Currently this modifier is allowed in the following context:
2694 //
2695 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2696 // 2. Before an 'abs' modifier: -abs(...)
2697 // 3. Before an SP3 'abs' modifier: -|...|
2698 //
2699 // In all other cases "-" is handled as a part
2700 // of an expression that follows the sign.
2701 //
2702 // Note: When "-" is followed by an integer literal,
2703 // this is interpreted as integer negation rather
2704 // than a floating-point NEG modifier applied to N.
2705 // Beside being contr-intuitive, such use of floating-point
2706 // NEG modifier would have resulted in different meaning
2707 // of integer literals used with VOP1/2/C and VOP3,
2708 // for example:
2709 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2710 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2711 // Negative fp literals with preceding "-" are
2712 // handled likewise for unifomtity
2713 //
2714 bool
2715 AMDGPUAsmParser::parseSP3NegModifier() {
2716 
2717   AsmToken NextToken[2];
2718   peekTokens(NextToken);
2719 
2720   if (isToken(AsmToken::Minus) &&
2721       (isRegister(NextToken[0], NextToken[1]) ||
2722        NextToken[0].is(AsmToken::Pipe) ||
2723        isId(NextToken[0], "abs"))) {
2724     lex();
2725     return true;
2726   }
2727 
2728   return false;
2729 }
2730 
2731 OperandMatchResultTy
2732 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2733                                               bool AllowImm) {
2734   bool Neg, SP3Neg;
2735   bool Abs, SP3Abs;
2736   SMLoc Loc;
2737 
2738   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2739   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2740     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2741     return MatchOperand_ParseFail;
2742   }
2743 
2744   SP3Neg = parseSP3NegModifier();
2745 
2746   Loc = getLoc();
2747   Neg = trySkipId("neg");
2748   if (Neg && SP3Neg) {
2749     Error(Loc, "expected register or immediate");
2750     return MatchOperand_ParseFail;
2751   }
2752   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2753     return MatchOperand_ParseFail;
2754 
2755   Abs = trySkipId("abs");
2756   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2757     return MatchOperand_ParseFail;
2758 
2759   Loc = getLoc();
2760   SP3Abs = trySkipToken(AsmToken::Pipe);
2761   if (Abs && SP3Abs) {
2762     Error(Loc, "expected register or immediate");
2763     return MatchOperand_ParseFail;
2764   }
2765 
2766   OperandMatchResultTy Res;
2767   if (AllowImm) {
2768     Res = parseRegOrImm(Operands, SP3Abs);
2769   } else {
2770     Res = parseReg(Operands);
2771   }
2772   if (Res != MatchOperand_Success) {
2773     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2774   }
2775 
2776   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2777     return MatchOperand_ParseFail;
2778   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2779     return MatchOperand_ParseFail;
2780   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2781     return MatchOperand_ParseFail;
2782 
2783   AMDGPUOperand::Modifiers Mods;
2784   Mods.Abs = Abs || SP3Abs;
2785   Mods.Neg = Neg || SP3Neg;
2786 
2787   if (Mods.hasFPModifiers()) {
2788     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2789     if (Op.isExpr()) {
2790       Error(Op.getStartLoc(), "expected an absolute expression");
2791       return MatchOperand_ParseFail;
2792     }
2793     Op.setModifiers(Mods);
2794   }
2795   return MatchOperand_Success;
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2800                                                bool AllowImm) {
2801   bool Sext = trySkipId("sext");
2802   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2803     return MatchOperand_ParseFail;
2804 
2805   OperandMatchResultTy Res;
2806   if (AllowImm) {
2807     Res = parseRegOrImm(Operands);
2808   } else {
2809     Res = parseReg(Operands);
2810   }
2811   if (Res != MatchOperand_Success) {
2812     return Sext? MatchOperand_ParseFail : Res;
2813   }
2814 
2815   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2816     return MatchOperand_ParseFail;
2817 
2818   AMDGPUOperand::Modifiers Mods;
2819   Mods.Sext = Sext;
2820 
2821   if (Mods.hasIntModifiers()) {
2822     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2823     if (Op.isExpr()) {
2824       Error(Op.getStartLoc(), "expected an absolute expression");
2825       return MatchOperand_ParseFail;
2826     }
2827     Op.setModifiers(Mods);
2828   }
2829 
2830   return MatchOperand_Success;
2831 }
2832 
2833 OperandMatchResultTy
2834 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2835   return parseRegOrImmWithFPInputMods(Operands, false);
2836 }
2837 
2838 OperandMatchResultTy
2839 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2840   return parseRegOrImmWithIntInputMods(Operands, false);
2841 }
2842 
2843 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2844   auto Loc = getLoc();
2845   if (trySkipId("off")) {
2846     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2847                                                 AMDGPUOperand::ImmTyOff, false));
2848     return MatchOperand_Success;
2849   }
2850 
2851   if (!isRegister())
2852     return MatchOperand_NoMatch;
2853 
2854   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2855   if (Reg) {
2856     Operands.push_back(std::move(Reg));
2857     return MatchOperand_Success;
2858   }
2859 
2860   return MatchOperand_ParseFail;
2861 
2862 }
2863 
2864 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2865   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2866 
2867   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2868       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2869       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2870       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2871     return Match_InvalidOperand;
2872 
2873   if ((TSFlags & SIInstrFlags::VOP3) &&
2874       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2875       getForcedEncodingSize() != 64)
2876     return Match_PreferE32;
2877 
2878   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2879       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2880     // v_mac_f32/16 allow only dst_sel == DWORD;
2881     auto OpNum =
2882         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2883     const auto &Op = Inst.getOperand(OpNum);
2884     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2885       return Match_InvalidOperand;
2886     }
2887   }
2888 
2889   return Match_Success;
2890 }
2891 
2892 static ArrayRef<unsigned> getAllVariants() {
2893   static const unsigned Variants[] = {
2894     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2895     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2896   };
2897 
2898   return makeArrayRef(Variants);
2899 }
2900 
2901 // What asm variants we should check
2902 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2903   if (getForcedEncodingSize() == 32) {
2904     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2905     return makeArrayRef(Variants);
2906   }
2907 
2908   if (isForcedVOP3()) {
2909     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2910     return makeArrayRef(Variants);
2911   }
2912 
2913   if (isForcedSDWA()) {
2914     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2915                                         AMDGPUAsmVariants::SDWA9};
2916     return makeArrayRef(Variants);
2917   }
2918 
2919   if (isForcedDPP()) {
2920     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2921     return makeArrayRef(Variants);
2922   }
2923 
2924   return getAllVariants();
2925 }
2926 
2927 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2928   if (getForcedEncodingSize() == 32)
2929     return "e32";
2930 
2931   if (isForcedVOP3())
2932     return "e64";
2933 
2934   if (isForcedSDWA())
2935     return "sdwa";
2936 
2937   if (isForcedDPP())
2938     return "dpp";
2939 
2940   return "";
2941 }
2942 
2943 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2944   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2945   const unsigned Num = Desc.getNumImplicitUses();
2946   for (unsigned i = 0; i < Num; ++i) {
2947     unsigned Reg = Desc.ImplicitUses[i];
2948     switch (Reg) {
2949     case AMDGPU::FLAT_SCR:
2950     case AMDGPU::VCC:
2951     case AMDGPU::VCC_LO:
2952     case AMDGPU::VCC_HI:
2953     case AMDGPU::M0:
2954       return Reg;
2955     default:
2956       break;
2957     }
2958   }
2959   return AMDGPU::NoRegister;
2960 }
2961 
2962 // NB: This code is correct only when used to check constant
2963 // bus limitations because GFX7 support no f16 inline constants.
2964 // Note that there are no cases when a GFX7 opcode violates
2965 // constant bus limitations due to the use of an f16 constant.
2966 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2967                                        unsigned OpIdx) const {
2968   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2969 
2970   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2971     return false;
2972   }
2973 
2974   const MCOperand &MO = Inst.getOperand(OpIdx);
2975 
2976   int64_t Val = MO.getImm();
2977   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2978 
2979   switch (OpSize) { // expected operand size
2980   case 8:
2981     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2982   case 4:
2983     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2984   case 2: {
2985     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2986     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2987         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2988         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2989       return AMDGPU::isInlinableIntLiteral(Val);
2990 
2991     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2992         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2993         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2994       return AMDGPU::isInlinableIntLiteralV216(Val);
2995 
2996     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2997         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2998         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2999       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3000 
3001     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3002   }
3003   default:
3004     llvm_unreachable("invalid operand size");
3005   }
3006 }
3007 
3008 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3009   if (!isGFX10Plus())
3010     return 1;
3011 
3012   switch (Opcode) {
3013   // 64-bit shift instructions can use only one scalar value input
3014   case AMDGPU::V_LSHLREV_B64_e64:
3015   case AMDGPU::V_LSHLREV_B64_gfx10:
3016   case AMDGPU::V_LSHRREV_B64_e64:
3017   case AMDGPU::V_LSHRREV_B64_gfx10:
3018   case AMDGPU::V_ASHRREV_I64_e64:
3019   case AMDGPU::V_ASHRREV_I64_gfx10:
3020   case AMDGPU::V_LSHL_B64_e64:
3021   case AMDGPU::V_LSHR_B64_e64:
3022   case AMDGPU::V_ASHR_I64_e64:
3023     return 1;
3024   default:
3025     return 2;
3026   }
3027 }
3028 
3029 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3030   const MCOperand &MO = Inst.getOperand(OpIdx);
3031   if (MO.isImm()) {
3032     return !isInlineConstant(Inst, OpIdx);
3033   } else if (MO.isReg()) {
3034     auto Reg = MO.getReg();
3035     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3036     auto PReg = mc2PseudoReg(Reg);
3037     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3038   } else {
3039     return true;
3040   }
3041 }
3042 
3043 bool
3044 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3045                                                 const OperandVector &Operands) {
3046   const unsigned Opcode = Inst.getOpcode();
3047   const MCInstrDesc &Desc = MII.get(Opcode);
3048   unsigned LastSGPR = AMDGPU::NoRegister;
3049   unsigned ConstantBusUseCount = 0;
3050   unsigned NumLiterals = 0;
3051   unsigned LiteralSize;
3052 
3053   if (Desc.TSFlags &
3054       (SIInstrFlags::VOPC |
3055        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3056        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3057        SIInstrFlags::SDWA)) {
3058     // Check special imm operands (used by madmk, etc)
3059     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3060       ++ConstantBusUseCount;
3061     }
3062 
3063     SmallDenseSet<unsigned> SGPRsUsed;
3064     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3065     if (SGPRUsed != AMDGPU::NoRegister) {
3066       SGPRsUsed.insert(SGPRUsed);
3067       ++ConstantBusUseCount;
3068     }
3069 
3070     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3071     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3072     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3073 
3074     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3075 
3076     for (int OpIdx : OpIndices) {
3077       if (OpIdx == -1) break;
3078 
3079       const MCOperand &MO = Inst.getOperand(OpIdx);
3080       if (usesConstantBus(Inst, OpIdx)) {
3081         if (MO.isReg()) {
3082           LastSGPR = mc2PseudoReg(MO.getReg());
3083           // Pairs of registers with a partial intersections like these
3084           //   s0, s[0:1]
3085           //   flat_scratch_lo, flat_scratch
3086           //   flat_scratch_lo, flat_scratch_hi
3087           // are theoretically valid but they are disabled anyway.
3088           // Note that this code mimics SIInstrInfo::verifyInstruction
3089           if (!SGPRsUsed.count(LastSGPR)) {
3090             SGPRsUsed.insert(LastSGPR);
3091             ++ConstantBusUseCount;
3092           }
3093         } else { // Expression or a literal
3094 
3095           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3096             continue; // special operand like VINTERP attr_chan
3097 
3098           // An instruction may use only one literal.
3099           // This has been validated on the previous step.
3100           // See validateVOP3Literal.
3101           // This literal may be used as more than one operand.
3102           // If all these operands are of the same size,
3103           // this literal counts as one scalar value.
3104           // Otherwise it counts as 2 scalar values.
3105           // See "GFX10 Shader Programming", section 3.6.2.3.
3106 
3107           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3108           if (Size < 4) Size = 4;
3109 
3110           if (NumLiterals == 0) {
3111             NumLiterals = 1;
3112             LiteralSize = Size;
3113           } else if (LiteralSize != Size) {
3114             NumLiterals = 2;
3115           }
3116         }
3117       }
3118     }
3119   }
3120   ConstantBusUseCount += NumLiterals;
3121 
3122   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3123     return true;
3124 
3125   SMLoc LitLoc = getLitLoc(Operands);
3126   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3127   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3128   Error(Loc, "invalid operand (violates constant bus restrictions)");
3129   return false;
3130 }
3131 
3132 bool
3133 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3134                                                  const OperandVector &Operands) {
3135   const unsigned Opcode = Inst.getOpcode();
3136   const MCInstrDesc &Desc = MII.get(Opcode);
3137 
3138   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3139   if (DstIdx == -1 ||
3140       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3141     return true;
3142   }
3143 
3144   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3145 
3146   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3147   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3148   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3149 
3150   assert(DstIdx != -1);
3151   const MCOperand &Dst = Inst.getOperand(DstIdx);
3152   assert(Dst.isReg());
3153   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3154 
3155   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3156 
3157   for (int SrcIdx : SrcIndices) {
3158     if (SrcIdx == -1) break;
3159     const MCOperand &Src = Inst.getOperand(SrcIdx);
3160     if (Src.isReg()) {
3161       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3162       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3163         Error(getRegLoc(SrcReg, Operands),
3164           "destination must be different than all sources");
3165         return false;
3166       }
3167     }
3168   }
3169 
3170   return true;
3171 }
3172 
3173 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3174 
3175   const unsigned Opc = Inst.getOpcode();
3176   const MCInstrDesc &Desc = MII.get(Opc);
3177 
3178   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3179     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3180     assert(ClampIdx != -1);
3181     return Inst.getOperand(ClampIdx).getImm() == 0;
3182   }
3183 
3184   return true;
3185 }
3186 
3187 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3188 
3189   const unsigned Opc = Inst.getOpcode();
3190   const MCInstrDesc &Desc = MII.get(Opc);
3191 
3192   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3193     return true;
3194 
3195   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3196   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3197   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3198 
3199   assert(VDataIdx != -1);
3200 
3201   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3202     return true;
3203 
3204   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3205   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3206   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3207   if (DMask == 0)
3208     DMask = 1;
3209 
3210   unsigned DataSize =
3211     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3212   if (hasPackedD16()) {
3213     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3214     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3215       DataSize = (DataSize + 1) / 2;
3216   }
3217 
3218   return (VDataSize / 4) == DataSize + TFESize;
3219 }
3220 
3221 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3222   const unsigned Opc = Inst.getOpcode();
3223   const MCInstrDesc &Desc = MII.get(Opc);
3224 
3225   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3226     return true;
3227 
3228   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3229 
3230   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3231       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3232   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3233   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3234   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3235 
3236   assert(VAddr0Idx != -1);
3237   assert(SrsrcIdx != -1);
3238   assert(SrsrcIdx > VAddr0Idx);
3239 
3240   if (DimIdx == -1)
3241     return true; // intersect_ray
3242 
3243   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3244   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3245   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3246   unsigned VAddrSize =
3247       IsNSA ? SrsrcIdx - VAddr0Idx
3248             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3249 
3250   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3251                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3252                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3253                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3254   if (!IsNSA) {
3255     if (AddrSize > 8)
3256       AddrSize = 16;
3257     else if (AddrSize > 4)
3258       AddrSize = 8;
3259   }
3260 
3261   return VAddrSize == AddrSize;
3262 }
3263 
3264 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3265 
3266   const unsigned Opc = Inst.getOpcode();
3267   const MCInstrDesc &Desc = MII.get(Opc);
3268 
3269   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3270     return true;
3271   if (!Desc.mayLoad() || !Desc.mayStore())
3272     return true; // Not atomic
3273 
3274   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3275   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3276 
3277   // This is an incomplete check because image_atomic_cmpswap
3278   // may only use 0x3 and 0xf while other atomic operations
3279   // may use 0x1 and 0x3. However these limitations are
3280   // verified when we check that dmask matches dst size.
3281   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3282 }
3283 
3284 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3285 
3286   const unsigned Opc = Inst.getOpcode();
3287   const MCInstrDesc &Desc = MII.get(Opc);
3288 
3289   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3290     return true;
3291 
3292   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3293   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3294 
3295   // GATHER4 instructions use dmask in a different fashion compared to
3296   // other MIMG instructions. The only useful DMASK values are
3297   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3298   // (red,red,red,red) etc.) The ISA document doesn't mention
3299   // this.
3300   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3301 }
3302 
3303 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3304 {
3305   switch (Opcode) {
3306   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3307   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3308   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3309     return true;
3310   default:
3311     return false;
3312   }
3313 }
3314 
3315 // movrels* opcodes should only allow VGPRS as src0.
3316 // This is specified in .td description for vop1/vop3,
3317 // but sdwa is handled differently. See isSDWAOperand.
3318 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3319                                       const OperandVector &Operands) {
3320 
3321   const unsigned Opc = Inst.getOpcode();
3322   const MCInstrDesc &Desc = MII.get(Opc);
3323 
3324   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3325     return true;
3326 
3327   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3328   assert(Src0Idx != -1);
3329 
3330   SMLoc ErrLoc;
3331   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3332   if (Src0.isReg()) {
3333     auto Reg = mc2PseudoReg(Src0.getReg());
3334     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3335     if (!isSGPR(Reg, TRI))
3336       return true;
3337     ErrLoc = getRegLoc(Reg, Operands);
3338   } else {
3339     ErrLoc = getConstLoc(Operands);
3340   }
3341 
3342   Error(ErrLoc, "source operand must be a VGPR");
3343   return false;
3344 }
3345 
3346 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3347                                           const OperandVector &Operands) {
3348 
3349   const unsigned Opc = Inst.getOpcode();
3350 
3351   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3352     return true;
3353 
3354   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3355   assert(Src0Idx != -1);
3356 
3357   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3358   if (!Src0.isReg())
3359     return true;
3360 
3361   auto Reg = mc2PseudoReg(Src0.getReg());
3362   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3363   if (isSGPR(Reg, TRI)) {
3364     Error(getRegLoc(Reg, Operands),
3365           "source operand must be either a VGPR or an inline constant");
3366     return false;
3367   }
3368 
3369   return true;
3370 }
3371 
3372 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3373   switch (Inst.getOpcode()) {
3374   default:
3375     return true;
3376   case V_DIV_SCALE_F32_gfx6_gfx7:
3377   case V_DIV_SCALE_F32_vi:
3378   case V_DIV_SCALE_F32_gfx10:
3379   case V_DIV_SCALE_F64_gfx6_gfx7:
3380   case V_DIV_SCALE_F64_vi:
3381   case V_DIV_SCALE_F64_gfx10:
3382     break;
3383   }
3384 
3385   // TODO: Check that src0 = src1 or src2.
3386 
3387   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3388                     AMDGPU::OpName::src2_modifiers,
3389                     AMDGPU::OpName::src2_modifiers}) {
3390     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3391             .getImm() &
3392         SISrcMods::ABS) {
3393       return false;
3394     }
3395   }
3396 
3397   return true;
3398 }
3399 
3400 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3401 
3402   const unsigned Opc = Inst.getOpcode();
3403   const MCInstrDesc &Desc = MII.get(Opc);
3404 
3405   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3406     return true;
3407 
3408   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3409   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3410     if (isCI() || isSI())
3411       return false;
3412   }
3413 
3414   return true;
3415 }
3416 
3417 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3418   const unsigned Opc = Inst.getOpcode();
3419   const MCInstrDesc &Desc = MII.get(Opc);
3420 
3421   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3422     return true;
3423 
3424   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3425   if (DimIdx < 0)
3426     return true;
3427 
3428   long Imm = Inst.getOperand(DimIdx).getImm();
3429   if (Imm < 0 || Imm >= 8)
3430     return false;
3431 
3432   return true;
3433 }
3434 
3435 static bool IsRevOpcode(const unsigned Opcode)
3436 {
3437   switch (Opcode) {
3438   case AMDGPU::V_SUBREV_F32_e32:
3439   case AMDGPU::V_SUBREV_F32_e64:
3440   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3441   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3442   case AMDGPU::V_SUBREV_F32_e32_vi:
3443   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3444   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3445   case AMDGPU::V_SUBREV_F32_e64_vi:
3446 
3447   case AMDGPU::V_SUBREV_CO_U32_e32:
3448   case AMDGPU::V_SUBREV_CO_U32_e64:
3449   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3450   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3451 
3452   case AMDGPU::V_SUBBREV_U32_e32:
3453   case AMDGPU::V_SUBBREV_U32_e64:
3454   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3455   case AMDGPU::V_SUBBREV_U32_e32_vi:
3456   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3457   case AMDGPU::V_SUBBREV_U32_e64_vi:
3458 
3459   case AMDGPU::V_SUBREV_U32_e32:
3460   case AMDGPU::V_SUBREV_U32_e64:
3461   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3462   case AMDGPU::V_SUBREV_U32_e32_vi:
3463   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3464   case AMDGPU::V_SUBREV_U32_e64_vi:
3465 
3466   case AMDGPU::V_SUBREV_F16_e32:
3467   case AMDGPU::V_SUBREV_F16_e64:
3468   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3469   case AMDGPU::V_SUBREV_F16_e32_vi:
3470   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3471   case AMDGPU::V_SUBREV_F16_e64_vi:
3472 
3473   case AMDGPU::V_SUBREV_U16_e32:
3474   case AMDGPU::V_SUBREV_U16_e64:
3475   case AMDGPU::V_SUBREV_U16_e32_vi:
3476   case AMDGPU::V_SUBREV_U16_e64_vi:
3477 
3478   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3479   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3480   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3481 
3482   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3483   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3484 
3485   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3486   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3487 
3488   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3489   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3490 
3491   case AMDGPU::V_LSHRREV_B32_e32:
3492   case AMDGPU::V_LSHRREV_B32_e64:
3493   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3494   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3495   case AMDGPU::V_LSHRREV_B32_e32_vi:
3496   case AMDGPU::V_LSHRREV_B32_e64_vi:
3497   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3498   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3499 
3500   case AMDGPU::V_ASHRREV_I32_e32:
3501   case AMDGPU::V_ASHRREV_I32_e64:
3502   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3503   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3504   case AMDGPU::V_ASHRREV_I32_e32_vi:
3505   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3506   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3507   case AMDGPU::V_ASHRREV_I32_e64_vi:
3508 
3509   case AMDGPU::V_LSHLREV_B32_e32:
3510   case AMDGPU::V_LSHLREV_B32_e64:
3511   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3512   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3513   case AMDGPU::V_LSHLREV_B32_e32_vi:
3514   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3515   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3516   case AMDGPU::V_LSHLREV_B32_e64_vi:
3517 
3518   case AMDGPU::V_LSHLREV_B16_e32:
3519   case AMDGPU::V_LSHLREV_B16_e64:
3520   case AMDGPU::V_LSHLREV_B16_e32_vi:
3521   case AMDGPU::V_LSHLREV_B16_e64_vi:
3522   case AMDGPU::V_LSHLREV_B16_gfx10:
3523 
3524   case AMDGPU::V_LSHRREV_B16_e32:
3525   case AMDGPU::V_LSHRREV_B16_e64:
3526   case AMDGPU::V_LSHRREV_B16_e32_vi:
3527   case AMDGPU::V_LSHRREV_B16_e64_vi:
3528   case AMDGPU::V_LSHRREV_B16_gfx10:
3529 
3530   case AMDGPU::V_ASHRREV_I16_e32:
3531   case AMDGPU::V_ASHRREV_I16_e64:
3532   case AMDGPU::V_ASHRREV_I16_e32_vi:
3533   case AMDGPU::V_ASHRREV_I16_e64_vi:
3534   case AMDGPU::V_ASHRREV_I16_gfx10:
3535 
3536   case AMDGPU::V_LSHLREV_B64_e64:
3537   case AMDGPU::V_LSHLREV_B64_gfx10:
3538   case AMDGPU::V_LSHLREV_B64_vi:
3539 
3540   case AMDGPU::V_LSHRREV_B64_e64:
3541   case AMDGPU::V_LSHRREV_B64_gfx10:
3542   case AMDGPU::V_LSHRREV_B64_vi:
3543 
3544   case AMDGPU::V_ASHRREV_I64_e64:
3545   case AMDGPU::V_ASHRREV_I64_gfx10:
3546   case AMDGPU::V_ASHRREV_I64_vi:
3547 
3548   case AMDGPU::V_PK_LSHLREV_B16:
3549   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3550   case AMDGPU::V_PK_LSHLREV_B16_vi:
3551 
3552   case AMDGPU::V_PK_LSHRREV_B16:
3553   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3554   case AMDGPU::V_PK_LSHRREV_B16_vi:
3555   case AMDGPU::V_PK_ASHRREV_I16:
3556   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3557   case AMDGPU::V_PK_ASHRREV_I16_vi:
3558     return true;
3559   default:
3560     return false;
3561   }
3562 }
3563 
3564 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3565 
3566   using namespace SIInstrFlags;
3567   const unsigned Opcode = Inst.getOpcode();
3568   const MCInstrDesc &Desc = MII.get(Opcode);
3569 
3570   // lds_direct register is defined so that it can be used
3571   // with 9-bit operands only. Ignore encodings which do not accept these.
3572   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3573     return true;
3574 
3575   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3576   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3577   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3578 
3579   const int SrcIndices[] = { Src1Idx, Src2Idx };
3580 
3581   // lds_direct cannot be specified as either src1 or src2.
3582   for (int SrcIdx : SrcIndices) {
3583     if (SrcIdx == -1) break;
3584     const MCOperand &Src = Inst.getOperand(SrcIdx);
3585     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3586       return false;
3587     }
3588   }
3589 
3590   if (Src0Idx == -1)
3591     return true;
3592 
3593   const MCOperand &Src = Inst.getOperand(Src0Idx);
3594   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3595     return true;
3596 
3597   // lds_direct is specified as src0. Check additional limitations.
3598   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3599 }
3600 
3601 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3602   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3603     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3604     if (Op.isFlatOffset())
3605       return Op.getStartLoc();
3606   }
3607   return getLoc();
3608 }
3609 
3610 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3611                                          const OperandVector &Operands) {
3612   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3613   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3614     return true;
3615 
3616   auto Opcode = Inst.getOpcode();
3617   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3618   assert(OpNum != -1);
3619 
3620   const auto &Op = Inst.getOperand(OpNum);
3621   if (!hasFlatOffsets() && Op.getImm() != 0) {
3622     Error(getFlatOffsetLoc(Operands),
3623           "flat offset modifier is not supported on this GPU");
3624     return false;
3625   }
3626 
3627   // For FLAT segment the offset must be positive;
3628   // MSB is ignored and forced to zero.
3629   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3630     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3631     if (!isIntN(OffsetSize, Op.getImm())) {
3632       Error(getFlatOffsetLoc(Operands),
3633             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3634       return false;
3635     }
3636   } else {
3637     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3638     if (!isUIntN(OffsetSize, Op.getImm())) {
3639       Error(getFlatOffsetLoc(Operands),
3640             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3641       return false;
3642     }
3643   }
3644 
3645   return true;
3646 }
3647 
3648 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3649   // Start with second operand because SMEM Offset cannot be dst or src0.
3650   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3651     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3652     if (Op.isSMEMOffset())
3653       return Op.getStartLoc();
3654   }
3655   return getLoc();
3656 }
3657 
3658 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3659                                          const OperandVector &Operands) {
3660   if (isCI() || isSI())
3661     return true;
3662 
3663   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3664   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3665     return true;
3666 
3667   auto Opcode = Inst.getOpcode();
3668   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3669   if (OpNum == -1)
3670     return true;
3671 
3672   const auto &Op = Inst.getOperand(OpNum);
3673   if (!Op.isImm())
3674     return true;
3675 
3676   uint64_t Offset = Op.getImm();
3677   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3678   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3679       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3680     return true;
3681 
3682   Error(getSMEMOffsetLoc(Operands),
3683         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3684                                "expected a 21-bit signed offset");
3685 
3686   return false;
3687 }
3688 
3689 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3690   unsigned Opcode = Inst.getOpcode();
3691   const MCInstrDesc &Desc = MII.get(Opcode);
3692   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3696   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3697 
3698   const int OpIndices[] = { Src0Idx, Src1Idx };
3699 
3700   unsigned NumExprs = 0;
3701   unsigned NumLiterals = 0;
3702   uint32_t LiteralValue;
3703 
3704   for (int OpIdx : OpIndices) {
3705     if (OpIdx == -1) break;
3706 
3707     const MCOperand &MO = Inst.getOperand(OpIdx);
3708     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3709     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3710       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3711         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3712         if (NumLiterals == 0 || LiteralValue != Value) {
3713           LiteralValue = Value;
3714           ++NumLiterals;
3715         }
3716       } else if (MO.isExpr()) {
3717         ++NumExprs;
3718       }
3719     }
3720   }
3721 
3722   return NumLiterals + NumExprs <= 1;
3723 }
3724 
3725 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3726   const unsigned Opc = Inst.getOpcode();
3727   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3728       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3729     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3730     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3731 
3732     if (OpSel & ~3)
3733       return false;
3734   }
3735   return true;
3736 }
3737 
3738 // Check if VCC register matches wavefront size
3739 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3740   auto FB = getFeatureBits();
3741   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3742     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3743 }
3744 
3745 // VOP3 literal is only allowed in GFX10+ and only one can be used
3746 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3747                                           const OperandVector &Operands) {
3748   unsigned Opcode = Inst.getOpcode();
3749   const MCInstrDesc &Desc = MII.get(Opcode);
3750   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3751     return true;
3752 
3753   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3754   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3755   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3756 
3757   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3758 
3759   unsigned NumExprs = 0;
3760   unsigned NumLiterals = 0;
3761   uint32_t LiteralValue;
3762 
3763   for (int OpIdx : OpIndices) {
3764     if (OpIdx == -1) break;
3765 
3766     const MCOperand &MO = Inst.getOperand(OpIdx);
3767     if (!MO.isImm() && !MO.isExpr())
3768       continue;
3769     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3770       continue;
3771 
3772     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3773         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3774       Error(getConstLoc(Operands),
3775             "inline constants are not allowed for this operand");
3776       return false;
3777     }
3778 
3779     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3780       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3781       if (NumLiterals == 0 || LiteralValue != Value) {
3782         LiteralValue = Value;
3783         ++NumLiterals;
3784       }
3785     } else if (MO.isExpr()) {
3786       ++NumExprs;
3787     }
3788   }
3789   NumLiterals += NumExprs;
3790 
3791   if (!NumLiterals)
3792     return true;
3793 
3794   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3795     Error(getLitLoc(Operands), "literal operands are not supported");
3796     return false;
3797   }
3798 
3799   if (NumLiterals > 1) {
3800     Error(getLitLoc(Operands), "only one literal operand is allowed");
3801     return false;
3802   }
3803 
3804   return true;
3805 }
3806 
3807 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3808                                             const OperandVector &Operands,
3809                                             const SMLoc &IDLoc) {
3810   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3811                                           AMDGPU::OpName::glc1);
3812   if (GLCPos != -1) {
3813     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3814     // in the asm string, and the default value means it is not present.
3815     if (Inst.getOperand(GLCPos).getImm() == -1) {
3816       Error(IDLoc, "instruction must use glc");
3817       return false;
3818     }
3819   }
3820 
3821   return true;
3822 }
3823 
3824 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3825                                           const SMLoc &IDLoc,
3826                                           const OperandVector &Operands) {
3827   if (!validateLdsDirect(Inst)) {
3828     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3829       "invalid use of lds_direct");
3830     return false;
3831   }
3832   if (!validateSOPLiteral(Inst)) {
3833     Error(getLitLoc(Operands),
3834       "only one literal operand is allowed");
3835     return false;
3836   }
3837   if (!validateVOP3Literal(Inst, Operands)) {
3838     return false;
3839   }
3840   if (!validateConstantBusLimitations(Inst, Operands)) {
3841     return false;
3842   }
3843   if (!validateEarlyClobberLimitations(Inst, Operands)) {
3844     return false;
3845   }
3846   if (!validateIntClampSupported(Inst)) {
3847     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3848       "integer clamping is not supported on this GPU");
3849     return false;
3850   }
3851   if (!validateOpSel(Inst)) {
3852     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3853       "invalid op_sel operand");
3854     return false;
3855   }
3856   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3857   if (!validateMIMGD16(Inst)) {
3858     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3859       "d16 modifier is not supported on this GPU");
3860     return false;
3861   }
3862   if (!validateMIMGDim(Inst)) {
3863     Error(IDLoc, "dim modifier is required on this GPU");
3864     return false;
3865   }
3866   if (!validateMIMGDataSize(Inst)) {
3867     Error(IDLoc,
3868       "image data size does not match dmask and tfe");
3869     return false;
3870   }
3871   if (!validateMIMGAddrSize(Inst)) {
3872     Error(IDLoc,
3873       "image address size does not match dim and a16");
3874     return false;
3875   }
3876   if (!validateMIMGAtomicDMask(Inst)) {
3877     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3878       "invalid atomic image dmask");
3879     return false;
3880   }
3881   if (!validateMIMGGatherDMask(Inst)) {
3882     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3883       "invalid image_gather dmask: only one bit must be set");
3884     return false;
3885   }
3886   if (!validateMovrels(Inst, Operands)) {
3887     return false;
3888   }
3889   if (!validateFlatOffset(Inst, Operands)) {
3890     return false;
3891   }
3892   if (!validateSMEMOffset(Inst, Operands)) {
3893     return false;
3894   }
3895   if (!validateMAIAccWrite(Inst, Operands)) {
3896     return false;
3897   }
3898   if (!validateDivScale(Inst)) {
3899     Error(IDLoc, "ABS not allowed in VOP3B instructions");
3900     return false;
3901   }
3902   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3903     return false;
3904   }
3905 
3906   return true;
3907 }
3908 
3909 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3910                                             const FeatureBitset &FBS,
3911                                             unsigned VariantID = 0);
3912 
3913 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3914                                 const FeatureBitset &AvailableFeatures,
3915                                 unsigned VariantID);
3916 
3917 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3918                                        const FeatureBitset &FBS) {
3919   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3920 }
3921 
3922 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3923                                        const FeatureBitset &FBS,
3924                                        ArrayRef<unsigned> Variants) {
3925   for (auto Variant : Variants) {
3926     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3927       return true;
3928   }
3929 
3930   return false;
3931 }
3932 
3933 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3934                                                   const SMLoc &IDLoc) {
3935   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3936 
3937   // Check if requested instruction variant is supported.
3938   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3939     return false;
3940 
3941   // This instruction is not supported.
3942   // Clear any other pending errors because they are no longer relevant.
3943   getParser().clearPendingErrors();
3944 
3945   // Requested instruction variant is not supported.
3946   // Check if any other variants are supported.
3947   StringRef VariantName = getMatchedVariantName();
3948   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3949     return Error(IDLoc,
3950                  Twine(VariantName,
3951                        " variant of this instruction is not supported"));
3952   }
3953 
3954   // Finally check if this instruction is supported on any other GPU.
3955   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3956     return Error(IDLoc, "instruction not supported on this GPU");
3957   }
3958 
3959   // Instruction not supported on any GPU. Probably a typo.
3960   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3961   return Error(IDLoc, "invalid instruction" + Suggestion);
3962 }
3963 
3964 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3965                                               OperandVector &Operands,
3966                                               MCStreamer &Out,
3967                                               uint64_t &ErrorInfo,
3968                                               bool MatchingInlineAsm) {
3969   MCInst Inst;
3970   unsigned Result = Match_Success;
3971   for (auto Variant : getMatchedVariants()) {
3972     uint64_t EI;
3973     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3974                                   Variant);
3975     // We order match statuses from least to most specific. We use most specific
3976     // status as resulting
3977     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3978     if ((R == Match_Success) ||
3979         (R == Match_PreferE32) ||
3980         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3981         (R == Match_InvalidOperand && Result != Match_MissingFeature
3982                                    && Result != Match_PreferE32) ||
3983         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3984                                    && Result != Match_MissingFeature
3985                                    && Result != Match_PreferE32)) {
3986       Result = R;
3987       ErrorInfo = EI;
3988     }
3989     if (R == Match_Success)
3990       break;
3991   }
3992 
3993   if (Result == Match_Success) {
3994     if (!validateInstruction(Inst, IDLoc, Operands)) {
3995       return true;
3996     }
3997     Inst.setLoc(IDLoc);
3998     Out.emitInstruction(Inst, getSTI());
3999     return false;
4000   }
4001 
4002   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4003   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4004     return true;
4005   }
4006 
4007   switch (Result) {
4008   default: break;
4009   case Match_MissingFeature:
4010     // It has been verified that the specified instruction
4011     // mnemonic is valid. A match was found but it requires
4012     // features which are not supported on this GPU.
4013     return Error(IDLoc, "operands are not valid for this GPU or mode");
4014 
4015   case Match_InvalidOperand: {
4016     SMLoc ErrorLoc = IDLoc;
4017     if (ErrorInfo != ~0ULL) {
4018       if (ErrorInfo >= Operands.size()) {
4019         return Error(IDLoc, "too few operands for instruction");
4020       }
4021       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4022       if (ErrorLoc == SMLoc())
4023         ErrorLoc = IDLoc;
4024     }
4025     return Error(ErrorLoc, "invalid operand for instruction");
4026   }
4027 
4028   case Match_PreferE32:
4029     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4030                         "should be encoded as e32");
4031   case Match_MnemonicFail:
4032     llvm_unreachable("Invalid instructions should have been handled already");
4033   }
4034   llvm_unreachable("Implement any new match types added!");
4035 }
4036 
4037 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4038   int64_t Tmp = -1;
4039   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4040     return true;
4041   }
4042   if (getParser().parseAbsoluteExpression(Tmp)) {
4043     return true;
4044   }
4045   Ret = static_cast<uint32_t>(Tmp);
4046   return false;
4047 }
4048 
4049 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4050                                                uint32_t &Minor) {
4051   if (ParseAsAbsoluteExpression(Major))
4052     return TokError("invalid major version");
4053 
4054   if (!trySkipToken(AsmToken::Comma))
4055     return TokError("minor version number required, comma expected");
4056 
4057   if (ParseAsAbsoluteExpression(Minor))
4058     return TokError("invalid minor version");
4059 
4060   return false;
4061 }
4062 
4063 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4064   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4065     return TokError("directive only supported for amdgcn architecture");
4066 
4067   std::string Target;
4068 
4069   SMLoc TargetStart = getLoc();
4070   if (getParser().parseEscapedString(Target))
4071     return true;
4072   SMRange TargetRange = SMRange(TargetStart, getLoc());
4073 
4074   std::string ExpectedTarget;
4075   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4076   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4077 
4078   if (Target != ExpectedTargetOS.str())
4079     return Error(TargetRange.Start, "target must match options", TargetRange);
4080 
4081   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4082   return false;
4083 }
4084 
4085 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4086   return Error(Range.Start, "value out of range", Range);
4087 }
4088 
4089 bool AMDGPUAsmParser::calculateGPRBlocks(
4090     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4091     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4092     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4093     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4094   // TODO(scott.linder): These calculations are duplicated from
4095   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4096   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4097 
4098   unsigned NumVGPRs = NextFreeVGPR;
4099   unsigned NumSGPRs = NextFreeSGPR;
4100 
4101   if (Version.Major >= 10)
4102     NumSGPRs = 0;
4103   else {
4104     unsigned MaxAddressableNumSGPRs =
4105         IsaInfo::getAddressableNumSGPRs(&getSTI());
4106 
4107     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4108         NumSGPRs > MaxAddressableNumSGPRs)
4109       return OutOfRangeError(SGPRRange);
4110 
4111     NumSGPRs +=
4112         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4113 
4114     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4115         NumSGPRs > MaxAddressableNumSGPRs)
4116       return OutOfRangeError(SGPRRange);
4117 
4118     if (Features.test(FeatureSGPRInitBug))
4119       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4120   }
4121 
4122   VGPRBlocks =
4123       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4124   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4125 
4126   return false;
4127 }
4128 
4129 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4130   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4131     return TokError("directive only supported for amdgcn architecture");
4132 
4133   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4134     return TokError("directive only supported for amdhsa OS");
4135 
4136   StringRef KernelName;
4137   if (getParser().parseIdentifier(KernelName))
4138     return true;
4139 
4140   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4141 
4142   StringSet<> Seen;
4143 
4144   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4145 
4146   SMRange VGPRRange;
4147   uint64_t NextFreeVGPR = 0;
4148   SMRange SGPRRange;
4149   uint64_t NextFreeSGPR = 0;
4150   unsigned UserSGPRCount = 0;
4151   bool ReserveVCC = true;
4152   bool ReserveFlatScr = true;
4153   bool ReserveXNACK = hasXNACK();
4154   Optional<bool> EnableWavefrontSize32;
4155 
4156   while (true) {
4157     while (trySkipToken(AsmToken::EndOfStatement));
4158 
4159     StringRef ID;
4160     SMRange IDRange = getTok().getLocRange();
4161     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4162       return true;
4163 
4164     if (ID == ".end_amdhsa_kernel")
4165       break;
4166 
4167     if (Seen.find(ID) != Seen.end())
4168       return TokError(".amdhsa_ directives cannot be repeated");
4169     Seen.insert(ID);
4170 
4171     SMLoc ValStart = getLoc();
4172     int64_t IVal;
4173     if (getParser().parseAbsoluteExpression(IVal))
4174       return true;
4175     SMLoc ValEnd = getLoc();
4176     SMRange ValRange = SMRange(ValStart, ValEnd);
4177 
4178     if (IVal < 0)
4179       return OutOfRangeError(ValRange);
4180 
4181     uint64_t Val = IVal;
4182 
4183 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4184   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4185     return OutOfRangeError(RANGE);                                             \
4186   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4187 
4188     if (ID == ".amdhsa_group_segment_fixed_size") {
4189       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4190         return OutOfRangeError(ValRange);
4191       KD.group_segment_fixed_size = Val;
4192     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4193       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4194         return OutOfRangeError(ValRange);
4195       KD.private_segment_fixed_size = Val;
4196     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4197       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4198                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4199                        Val, ValRange);
4200       if (Val)
4201         UserSGPRCount += 4;
4202     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4203       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4204                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4205                        ValRange);
4206       if (Val)
4207         UserSGPRCount += 2;
4208     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4209       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4210                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4211                        ValRange);
4212       if (Val)
4213         UserSGPRCount += 2;
4214     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4215       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4216                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4217                        Val, ValRange);
4218       if (Val)
4219         UserSGPRCount += 2;
4220     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4221       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4222                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4223                        ValRange);
4224       if (Val)
4225         UserSGPRCount += 2;
4226     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4227       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4228                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4229                        ValRange);
4230       if (Val)
4231         UserSGPRCount += 2;
4232     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4233       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4234                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4235                        Val, ValRange);
4236       if (Val)
4237         UserSGPRCount += 1;
4238     } else if (ID == ".amdhsa_wavefront_size32") {
4239       if (IVersion.Major < 10)
4240         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4241       EnableWavefrontSize32 = Val;
4242       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4243                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4244                        Val, ValRange);
4245     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4246       PARSE_BITS_ENTRY(
4247           KD.compute_pgm_rsrc2,
4248           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4249           ValRange);
4250     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4251       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4252                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4253                        ValRange);
4254     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4255       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4256                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4257                        ValRange);
4258     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4259       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4260                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4261                        ValRange);
4262     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4263       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4264                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4265                        ValRange);
4266     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4267       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4268                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4269                        ValRange);
4270     } else if (ID == ".amdhsa_next_free_vgpr") {
4271       VGPRRange = ValRange;
4272       NextFreeVGPR = Val;
4273     } else if (ID == ".amdhsa_next_free_sgpr") {
4274       SGPRRange = ValRange;
4275       NextFreeSGPR = Val;
4276     } else if (ID == ".amdhsa_reserve_vcc") {
4277       if (!isUInt<1>(Val))
4278         return OutOfRangeError(ValRange);
4279       ReserveVCC = Val;
4280     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4281       if (IVersion.Major < 7)
4282         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4283       if (!isUInt<1>(Val))
4284         return OutOfRangeError(ValRange);
4285       ReserveFlatScr = Val;
4286     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4287       if (IVersion.Major < 8)
4288         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4289       if (!isUInt<1>(Val))
4290         return OutOfRangeError(ValRange);
4291       ReserveXNACK = Val;
4292     } else if (ID == ".amdhsa_float_round_mode_32") {
4293       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4294                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4295     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4296       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4297                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4298     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4299       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4300                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4301     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4302       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4303                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4304                        ValRange);
4305     } else if (ID == ".amdhsa_dx10_clamp") {
4306       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4307                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4308     } else if (ID == ".amdhsa_ieee_mode") {
4309       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4310                        Val, ValRange);
4311     } else if (ID == ".amdhsa_fp16_overflow") {
4312       if (IVersion.Major < 9)
4313         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4314       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4315                        ValRange);
4316     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4317       if (IVersion.Major < 10)
4318         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4319       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4320                        ValRange);
4321     } else if (ID == ".amdhsa_memory_ordered") {
4322       if (IVersion.Major < 10)
4323         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4324       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4325                        ValRange);
4326     } else if (ID == ".amdhsa_forward_progress") {
4327       if (IVersion.Major < 10)
4328         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4329       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4330                        ValRange);
4331     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4332       PARSE_BITS_ENTRY(
4333           KD.compute_pgm_rsrc2,
4334           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4335           ValRange);
4336     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4337       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4338                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4339                        Val, ValRange);
4340     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4341       PARSE_BITS_ENTRY(
4342           KD.compute_pgm_rsrc2,
4343           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4344           ValRange);
4345     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4346       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4347                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4348                        Val, ValRange);
4349     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4350       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4351                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4352                        Val, ValRange);
4353     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4354       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4355                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4356                        Val, ValRange);
4357     } else if (ID == ".amdhsa_exception_int_div_zero") {
4358       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4359                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4360                        Val, ValRange);
4361     } else {
4362       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4363     }
4364 
4365 #undef PARSE_BITS_ENTRY
4366   }
4367 
4368   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4369     return TokError(".amdhsa_next_free_vgpr directive is required");
4370 
4371   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4372     return TokError(".amdhsa_next_free_sgpr directive is required");
4373 
4374   unsigned VGPRBlocks;
4375   unsigned SGPRBlocks;
4376   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4377                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4378                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4379                          SGPRBlocks))
4380     return true;
4381 
4382   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4383           VGPRBlocks))
4384     return OutOfRangeError(VGPRRange);
4385   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4386                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4387 
4388   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4389           SGPRBlocks))
4390     return OutOfRangeError(SGPRRange);
4391   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4392                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4393                   SGPRBlocks);
4394 
4395   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4396     return TokError("too many user SGPRs enabled");
4397   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4398                   UserSGPRCount);
4399 
4400   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4401       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4402       ReserveFlatScr, ReserveXNACK);
4403   return false;
4404 }
4405 
4406 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4407   uint32_t Major;
4408   uint32_t Minor;
4409 
4410   if (ParseDirectiveMajorMinor(Major, Minor))
4411     return true;
4412 
4413   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4414   return false;
4415 }
4416 
4417 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4418   uint32_t Major;
4419   uint32_t Minor;
4420   uint32_t Stepping;
4421   StringRef VendorName;
4422   StringRef ArchName;
4423 
4424   // If this directive has no arguments, then use the ISA version for the
4425   // targeted GPU.
4426   if (isToken(AsmToken::EndOfStatement)) {
4427     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4428     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4429                                                       ISA.Stepping,
4430                                                       "AMD", "AMDGPU");
4431     return false;
4432   }
4433 
4434   if (ParseDirectiveMajorMinor(Major, Minor))
4435     return true;
4436 
4437   if (!trySkipToken(AsmToken::Comma))
4438     return TokError("stepping version number required, comma expected");
4439 
4440   if (ParseAsAbsoluteExpression(Stepping))
4441     return TokError("invalid stepping version");
4442 
4443   if (!trySkipToken(AsmToken::Comma))
4444     return TokError("vendor name required, comma expected");
4445 
4446   if (!parseString(VendorName, "invalid vendor name"))
4447     return true;
4448 
4449   if (!trySkipToken(AsmToken::Comma))
4450     return TokError("arch name required, comma expected");
4451 
4452   if (!parseString(ArchName, "invalid arch name"))
4453     return true;
4454 
4455   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4456                                                     VendorName, ArchName);
4457   return false;
4458 }
4459 
4460 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4461                                                amd_kernel_code_t &Header) {
4462   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4463   // assembly for backwards compatibility.
4464   if (ID == "max_scratch_backing_memory_byte_size") {
4465     Parser.eatToEndOfStatement();
4466     return false;
4467   }
4468 
4469   SmallString<40> ErrStr;
4470   raw_svector_ostream Err(ErrStr);
4471   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4472     return TokError(Err.str());
4473   }
4474   Lex();
4475 
4476   if (ID == "enable_wavefront_size32") {
4477     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4478       if (!isGFX10Plus())
4479         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4480       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4481         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4482     } else {
4483       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4484         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4485     }
4486   }
4487 
4488   if (ID == "wavefront_size") {
4489     if (Header.wavefront_size == 5) {
4490       if (!isGFX10Plus())
4491         return TokError("wavefront_size=5 is only allowed on GFX10+");
4492       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4493         return TokError("wavefront_size=5 requires +WavefrontSize32");
4494     } else if (Header.wavefront_size == 6) {
4495       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4496         return TokError("wavefront_size=6 requires +WavefrontSize64");
4497     }
4498   }
4499 
4500   if (ID == "enable_wgp_mode") {
4501     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4502         !isGFX10Plus())
4503       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4504   }
4505 
4506   if (ID == "enable_mem_ordered") {
4507     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4508         !isGFX10Plus())
4509       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4510   }
4511 
4512   if (ID == "enable_fwd_progress") {
4513     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4514         !isGFX10Plus())
4515       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4516   }
4517 
4518   return false;
4519 }
4520 
4521 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4522   amd_kernel_code_t Header;
4523   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4524 
4525   while (true) {
4526     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4527     // will set the current token to EndOfStatement.
4528     while(trySkipToken(AsmToken::EndOfStatement));
4529 
4530     StringRef ID;
4531     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4532       return true;
4533 
4534     if (ID == ".end_amd_kernel_code_t")
4535       break;
4536 
4537     if (ParseAMDKernelCodeTValue(ID, Header))
4538       return true;
4539   }
4540 
4541   getTargetStreamer().EmitAMDKernelCodeT(Header);
4542 
4543   return false;
4544 }
4545 
4546 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4547   StringRef KernelName;
4548   if (!parseId(KernelName, "expected symbol name"))
4549     return true;
4550 
4551   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4552                                            ELF::STT_AMDGPU_HSA_KERNEL);
4553 
4554   KernelScope.initialize(getContext());
4555   return false;
4556 }
4557 
4558 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4559   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4560     return Error(getLoc(),
4561                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4562                  "architectures");
4563   }
4564 
4565   auto ISAVersionStringFromASM = getToken().getStringContents();
4566 
4567   std::string ISAVersionStringFromSTI;
4568   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4569   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4570 
4571   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4572     return Error(getLoc(),
4573                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4574                  "arguments specified through the command line");
4575   }
4576 
4577   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4578   Lex();
4579 
4580   return false;
4581 }
4582 
4583 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4584   const char *AssemblerDirectiveBegin;
4585   const char *AssemblerDirectiveEnd;
4586   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4587       isHsaAbiVersion3(&getSTI())
4588           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4589                             HSAMD::V3::AssemblerDirectiveEnd)
4590           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4591                             HSAMD::AssemblerDirectiveEnd);
4592 
4593   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4594     return Error(getLoc(),
4595                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4596                  "not available on non-amdhsa OSes")).str());
4597   }
4598 
4599   std::string HSAMetadataString;
4600   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4601                           HSAMetadataString))
4602     return true;
4603 
4604   if (isHsaAbiVersion3(&getSTI())) {
4605     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4606       return Error(getLoc(), "invalid HSA metadata");
4607   } else {
4608     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4609       return Error(getLoc(), "invalid HSA metadata");
4610   }
4611 
4612   return false;
4613 }
4614 
4615 /// Common code to parse out a block of text (typically YAML) between start and
4616 /// end directives.
4617 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4618                                           const char *AssemblerDirectiveEnd,
4619                                           std::string &CollectString) {
4620 
4621   raw_string_ostream CollectStream(CollectString);
4622 
4623   getLexer().setSkipSpace(false);
4624 
4625   bool FoundEnd = false;
4626   while (!isToken(AsmToken::Eof)) {
4627     while (isToken(AsmToken::Space)) {
4628       CollectStream << getTokenStr();
4629       Lex();
4630     }
4631 
4632     if (trySkipId(AssemblerDirectiveEnd)) {
4633       FoundEnd = true;
4634       break;
4635     }
4636 
4637     CollectStream << Parser.parseStringToEndOfStatement()
4638                   << getContext().getAsmInfo()->getSeparatorString();
4639 
4640     Parser.eatToEndOfStatement();
4641   }
4642 
4643   getLexer().setSkipSpace(true);
4644 
4645   if (isToken(AsmToken::Eof) && !FoundEnd) {
4646     return TokError(Twine("expected directive ") +
4647                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4648   }
4649 
4650   CollectStream.flush();
4651   return false;
4652 }
4653 
4654 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4655 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4656   std::string String;
4657   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4658                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4659     return true;
4660 
4661   auto PALMetadata = getTargetStreamer().getPALMetadata();
4662   if (!PALMetadata->setFromString(String))
4663     return Error(getLoc(), "invalid PAL metadata");
4664   return false;
4665 }
4666 
4667 /// Parse the assembler directive for old linear-format PAL metadata.
4668 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4669   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4670     return Error(getLoc(),
4671                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4672                  "not available on non-amdpal OSes")).str());
4673   }
4674 
4675   auto PALMetadata = getTargetStreamer().getPALMetadata();
4676   PALMetadata->setLegacy();
4677   for (;;) {
4678     uint32_t Key, Value;
4679     if (ParseAsAbsoluteExpression(Key)) {
4680       return TokError(Twine("invalid value in ") +
4681                       Twine(PALMD::AssemblerDirective));
4682     }
4683     if (!trySkipToken(AsmToken::Comma)) {
4684       return TokError(Twine("expected an even number of values in ") +
4685                       Twine(PALMD::AssemblerDirective));
4686     }
4687     if (ParseAsAbsoluteExpression(Value)) {
4688       return TokError(Twine("invalid value in ") +
4689                       Twine(PALMD::AssemblerDirective));
4690     }
4691     PALMetadata->setRegister(Key, Value);
4692     if (!trySkipToken(AsmToken::Comma))
4693       break;
4694   }
4695   return false;
4696 }
4697 
4698 /// ParseDirectiveAMDGPULDS
4699 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4700 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4701   if (getParser().checkForValidSection())
4702     return true;
4703 
4704   StringRef Name;
4705   SMLoc NameLoc = getLoc();
4706   if (getParser().parseIdentifier(Name))
4707     return TokError("expected identifier in directive");
4708 
4709   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4710   if (parseToken(AsmToken::Comma, "expected ','"))
4711     return true;
4712 
4713   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4714 
4715   int64_t Size;
4716   SMLoc SizeLoc = getLoc();
4717   if (getParser().parseAbsoluteExpression(Size))
4718     return true;
4719   if (Size < 0)
4720     return Error(SizeLoc, "size must be non-negative");
4721   if (Size > LocalMemorySize)
4722     return Error(SizeLoc, "size is too large");
4723 
4724   int64_t Alignment = 4;
4725   if (trySkipToken(AsmToken::Comma)) {
4726     SMLoc AlignLoc = getLoc();
4727     if (getParser().parseAbsoluteExpression(Alignment))
4728       return true;
4729     if (Alignment < 0 || !isPowerOf2_64(Alignment))
4730       return Error(AlignLoc, "alignment must be a power of two");
4731 
4732     // Alignment larger than the size of LDS is possible in theory, as long
4733     // as the linker manages to place to symbol at address 0, but we do want
4734     // to make sure the alignment fits nicely into a 32-bit integer.
4735     if (Alignment >= 1u << 31)
4736       return Error(AlignLoc, "alignment is too large");
4737   }
4738 
4739   if (parseToken(AsmToken::EndOfStatement,
4740                  "unexpected token in '.amdgpu_lds' directive"))
4741     return true;
4742 
4743   Symbol->redefineIfPossible();
4744   if (!Symbol->isUndefined())
4745     return Error(NameLoc, "invalid symbol redefinition");
4746 
4747   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4748   return false;
4749 }
4750 
4751 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4752   StringRef IDVal = DirectiveID.getString();
4753 
4754   if (isHsaAbiVersion3(&getSTI())) {
4755     if (IDVal == ".amdgcn_target")
4756       return ParseDirectiveAMDGCNTarget();
4757 
4758     if (IDVal == ".amdhsa_kernel")
4759       return ParseDirectiveAMDHSAKernel();
4760 
4761     // TODO: Restructure/combine with PAL metadata directive.
4762     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4763       return ParseDirectiveHSAMetadata();
4764   } else {
4765     if (IDVal == ".hsa_code_object_version")
4766       return ParseDirectiveHSACodeObjectVersion();
4767 
4768     if (IDVal == ".hsa_code_object_isa")
4769       return ParseDirectiveHSACodeObjectISA();
4770 
4771     if (IDVal == ".amd_kernel_code_t")
4772       return ParseDirectiveAMDKernelCodeT();
4773 
4774     if (IDVal == ".amdgpu_hsa_kernel")
4775       return ParseDirectiveAMDGPUHsaKernel();
4776 
4777     if (IDVal == ".amd_amdgpu_isa")
4778       return ParseDirectiveISAVersion();
4779 
4780     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4781       return ParseDirectiveHSAMetadata();
4782   }
4783 
4784   if (IDVal == ".amdgpu_lds")
4785     return ParseDirectiveAMDGPULDS();
4786 
4787   if (IDVal == PALMD::AssemblerDirectiveBegin)
4788     return ParseDirectivePALMetadataBegin();
4789 
4790   if (IDVal == PALMD::AssemblerDirective)
4791     return ParseDirectivePALMetadata();
4792 
4793   return true;
4794 }
4795 
4796 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4797                                            unsigned RegNo) const {
4798 
4799   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4800        R.isValid(); ++R) {
4801     if (*R == RegNo)
4802       return isGFX9Plus();
4803   }
4804 
4805   // GFX10 has 2 more SGPRs 104 and 105.
4806   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4807        R.isValid(); ++R) {
4808     if (*R == RegNo)
4809       return hasSGPR104_SGPR105();
4810   }
4811 
4812   switch (RegNo) {
4813   case AMDGPU::SRC_SHARED_BASE:
4814   case AMDGPU::SRC_SHARED_LIMIT:
4815   case AMDGPU::SRC_PRIVATE_BASE:
4816   case AMDGPU::SRC_PRIVATE_LIMIT:
4817   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4818     return isGFX9Plus();
4819   case AMDGPU::TBA:
4820   case AMDGPU::TBA_LO:
4821   case AMDGPU::TBA_HI:
4822   case AMDGPU::TMA:
4823   case AMDGPU::TMA_LO:
4824   case AMDGPU::TMA_HI:
4825     return !isGFX9Plus();
4826   case AMDGPU::XNACK_MASK:
4827   case AMDGPU::XNACK_MASK_LO:
4828   case AMDGPU::XNACK_MASK_HI:
4829     return (isVI() || isGFX9()) && hasXNACK();
4830   case AMDGPU::SGPR_NULL:
4831     return isGFX10Plus();
4832   default:
4833     break;
4834   }
4835 
4836   if (isCI())
4837     return true;
4838 
4839   if (isSI() || isGFX10Plus()) {
4840     // No flat_scr on SI.
4841     // On GFX10 flat scratch is not a valid register operand and can only be
4842     // accessed with s_setreg/s_getreg.
4843     switch (RegNo) {
4844     case AMDGPU::FLAT_SCR:
4845     case AMDGPU::FLAT_SCR_LO:
4846     case AMDGPU::FLAT_SCR_HI:
4847       return false;
4848     default:
4849       return true;
4850     }
4851   }
4852 
4853   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4854   // SI/CI have.
4855   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4856        R.isValid(); ++R) {
4857     if (*R == RegNo)
4858       return hasSGPR102_SGPR103();
4859   }
4860 
4861   return true;
4862 }
4863 
4864 OperandMatchResultTy
4865 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4866                               OperandMode Mode) {
4867   // Try to parse with a custom parser
4868   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4869 
4870   // If we successfully parsed the operand or if there as an error parsing,
4871   // we are done.
4872   //
4873   // If we are parsing after we reach EndOfStatement then this means we
4874   // are appending default values to the Operands list.  This is only done
4875   // by custom parser, so we shouldn't continue on to the generic parsing.
4876   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4877       isToken(AsmToken::EndOfStatement))
4878     return ResTy;
4879 
4880   SMLoc RBraceLoc;
4881   SMLoc LBraceLoc = getLoc();
4882   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
4883     unsigned Prefix = Operands.size();
4884 
4885     for (;;) {
4886       auto Loc = getLoc();
4887       ResTy = parseReg(Operands);
4888       if (ResTy == MatchOperand_NoMatch)
4889         Error(Loc, "expected a register");
4890       if (ResTy != MatchOperand_Success)
4891         return MatchOperand_ParseFail;
4892 
4893       RBraceLoc = getLoc();
4894       if (trySkipToken(AsmToken::RBrac))
4895         break;
4896 
4897       if (!skipToken(AsmToken::Comma,
4898                      "expected a comma or a closing square bracket")) {
4899         return MatchOperand_ParseFail;
4900       }
4901     }
4902 
4903     if (Operands.size() - Prefix > 1) {
4904       Operands.insert(Operands.begin() + Prefix,
4905                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4906       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
4907     }
4908 
4909     return MatchOperand_Success;
4910   }
4911 
4912   return parseRegOrImm(Operands);
4913 }
4914 
4915 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4916   // Clear any forced encodings from the previous instruction.
4917   setForcedEncodingSize(0);
4918   setForcedDPP(false);
4919   setForcedSDWA(false);
4920 
4921   if (Name.endswith("_e64")) {
4922     setForcedEncodingSize(64);
4923     return Name.substr(0, Name.size() - 4);
4924   } else if (Name.endswith("_e32")) {
4925     setForcedEncodingSize(32);
4926     return Name.substr(0, Name.size() - 4);
4927   } else if (Name.endswith("_dpp")) {
4928     setForcedDPP(true);
4929     return Name.substr(0, Name.size() - 4);
4930   } else if (Name.endswith("_sdwa")) {
4931     setForcedSDWA(true);
4932     return Name.substr(0, Name.size() - 5);
4933   }
4934   return Name;
4935 }
4936 
4937 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4938                                        StringRef Name,
4939                                        SMLoc NameLoc, OperandVector &Operands) {
4940   // Add the instruction mnemonic
4941   Name = parseMnemonicSuffix(Name);
4942   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4943 
4944   bool IsMIMG = Name.startswith("image_");
4945 
4946   while (!trySkipToken(AsmToken::EndOfStatement)) {
4947     OperandMode Mode = OperandMode_Default;
4948     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
4949       Mode = OperandMode_NSA;
4950     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4951 
4952     if (Res != MatchOperand_Success) {
4953       checkUnsupportedInstruction(Name, NameLoc);
4954       if (!Parser.hasPendingError()) {
4955         // FIXME: use real operand location rather than the current location.
4956         StringRef Msg =
4957           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4958                                             "not a valid operand.";
4959         Error(getLoc(), Msg);
4960       }
4961       while (!trySkipToken(AsmToken::EndOfStatement)) {
4962         lex();
4963       }
4964       return true;
4965     }
4966 
4967     // Eat the comma or space if there is one.
4968     trySkipToken(AsmToken::Comma);
4969   }
4970 
4971   return false;
4972 }
4973 
4974 //===----------------------------------------------------------------------===//
4975 // Utility functions
4976 //===----------------------------------------------------------------------===//
4977 
4978 OperandMatchResultTy
4979 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4980 
4981   if (!trySkipId(Prefix, AsmToken::Colon))
4982     return MatchOperand_NoMatch;
4983 
4984   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4985 }
4986 
4987 OperandMatchResultTy
4988 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4989                                     AMDGPUOperand::ImmTy ImmTy,
4990                                     bool (*ConvertResult)(int64_t&)) {
4991   SMLoc S = getLoc();
4992   int64_t Value = 0;
4993 
4994   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4995   if (Res != MatchOperand_Success)
4996     return Res;
4997 
4998   if (ConvertResult && !ConvertResult(Value)) {
4999     Error(S, "invalid " + StringRef(Prefix) + " value.");
5000   }
5001 
5002   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5003   return MatchOperand_Success;
5004 }
5005 
5006 OperandMatchResultTy
5007 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5008                                              OperandVector &Operands,
5009                                              AMDGPUOperand::ImmTy ImmTy,
5010                                              bool (*ConvertResult)(int64_t&)) {
5011   SMLoc S = getLoc();
5012   if (!trySkipId(Prefix, AsmToken::Colon))
5013     return MatchOperand_NoMatch;
5014 
5015   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5016     return MatchOperand_ParseFail;
5017 
5018   unsigned Val = 0;
5019   const unsigned MaxSize = 4;
5020 
5021   // FIXME: How to verify the number of elements matches the number of src
5022   // operands?
5023   for (int I = 0; ; ++I) {
5024     int64_t Op;
5025     SMLoc Loc = getLoc();
5026     if (!parseExpr(Op))
5027       return MatchOperand_ParseFail;
5028 
5029     if (Op != 0 && Op != 1) {
5030       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5031       return MatchOperand_ParseFail;
5032     }
5033 
5034     Val |= (Op << I);
5035 
5036     if (trySkipToken(AsmToken::RBrac))
5037       break;
5038 
5039     if (I + 1 == MaxSize) {
5040       Error(getLoc(), "expected a closing square bracket");
5041       return MatchOperand_ParseFail;
5042     }
5043 
5044     if (!skipToken(AsmToken::Comma, "expected a comma"))
5045       return MatchOperand_ParseFail;
5046   }
5047 
5048   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5049   return MatchOperand_Success;
5050 }
5051 
5052 OperandMatchResultTy
5053 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5054                                AMDGPUOperand::ImmTy ImmTy) {
5055   int64_t Bit;
5056   SMLoc S = getLoc();
5057 
5058   if (trySkipId(Name)) {
5059     Bit = 1;
5060   } else if (trySkipId("no", Name)) {
5061     Bit = 0;
5062   } else {
5063     return MatchOperand_NoMatch;
5064   }
5065 
5066   if (Name == "r128" && !hasMIMG_R128()) {
5067     Error(S, "r128 modifier is not supported on this GPU");
5068     return MatchOperand_ParseFail;
5069   }
5070   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5071     Error(S, "a16 modifier is not supported on this GPU");
5072     return MatchOperand_ParseFail;
5073   }
5074   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
5075     Error(S, "dlc modifier is not supported on this GPU");
5076     return MatchOperand_ParseFail;
5077   }
5078 
5079   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5080     ImmTy = AMDGPUOperand::ImmTyR128A16;
5081 
5082   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5083   return MatchOperand_Success;
5084 }
5085 
5086 static void addOptionalImmOperand(
5087   MCInst& Inst, const OperandVector& Operands,
5088   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5089   AMDGPUOperand::ImmTy ImmT,
5090   int64_t Default = 0) {
5091   auto i = OptionalIdx.find(ImmT);
5092   if (i != OptionalIdx.end()) {
5093     unsigned Idx = i->second;
5094     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5095   } else {
5096     Inst.addOperand(MCOperand::createImm(Default));
5097   }
5098 }
5099 
5100 OperandMatchResultTy
5101 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5102                                        StringRef &Value,
5103                                        SMLoc &StringLoc) {
5104   if (!trySkipId(Prefix, AsmToken::Colon))
5105     return MatchOperand_NoMatch;
5106 
5107   StringLoc = getLoc();
5108   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5109                                                   : MatchOperand_ParseFail;
5110 }
5111 
5112 //===----------------------------------------------------------------------===//
5113 // MTBUF format
5114 //===----------------------------------------------------------------------===//
5115 
5116 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5117                                   int64_t MaxVal,
5118                                   int64_t &Fmt) {
5119   int64_t Val;
5120   SMLoc Loc = getLoc();
5121 
5122   auto Res = parseIntWithPrefix(Pref, Val);
5123   if (Res == MatchOperand_ParseFail)
5124     return false;
5125   if (Res == MatchOperand_NoMatch)
5126     return true;
5127 
5128   if (Val < 0 || Val > MaxVal) {
5129     Error(Loc, Twine("out of range ", StringRef(Pref)));
5130     return false;
5131   }
5132 
5133   Fmt = Val;
5134   return true;
5135 }
5136 
5137 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5138 // values to live in a joint format operand in the MCInst encoding.
5139 OperandMatchResultTy
5140 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5141   using namespace llvm::AMDGPU::MTBUFFormat;
5142 
5143   int64_t Dfmt = DFMT_UNDEF;
5144   int64_t Nfmt = NFMT_UNDEF;
5145 
5146   // dfmt and nfmt can appear in either order, and each is optional.
5147   for (int I = 0; I < 2; ++I) {
5148     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5149       return MatchOperand_ParseFail;
5150 
5151     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5152       return MatchOperand_ParseFail;
5153     }
5154     // Skip optional comma between dfmt/nfmt
5155     // but guard against 2 commas following each other.
5156     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5157         !peekToken().is(AsmToken::Comma)) {
5158       trySkipToken(AsmToken::Comma);
5159     }
5160   }
5161 
5162   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5163     return MatchOperand_NoMatch;
5164 
5165   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5166   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5167 
5168   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5169   return MatchOperand_Success;
5170 }
5171 
5172 OperandMatchResultTy
5173 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5174   using namespace llvm::AMDGPU::MTBUFFormat;
5175 
5176   int64_t Fmt = UFMT_UNDEF;
5177 
5178   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5179     return MatchOperand_ParseFail;
5180 
5181   if (Fmt == UFMT_UNDEF)
5182     return MatchOperand_NoMatch;
5183 
5184   Format = Fmt;
5185   return MatchOperand_Success;
5186 }
5187 
5188 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5189                                     int64_t &Nfmt,
5190                                     StringRef FormatStr,
5191                                     SMLoc Loc) {
5192   using namespace llvm::AMDGPU::MTBUFFormat;
5193   int64_t Format;
5194 
5195   Format = getDfmt(FormatStr);
5196   if (Format != DFMT_UNDEF) {
5197     Dfmt = Format;
5198     return true;
5199   }
5200 
5201   Format = getNfmt(FormatStr, getSTI());
5202   if (Format != NFMT_UNDEF) {
5203     Nfmt = Format;
5204     return true;
5205   }
5206 
5207   Error(Loc, "unsupported format");
5208   return false;
5209 }
5210 
5211 OperandMatchResultTy
5212 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5213                                           SMLoc FormatLoc,
5214                                           int64_t &Format) {
5215   using namespace llvm::AMDGPU::MTBUFFormat;
5216 
5217   int64_t Dfmt = DFMT_UNDEF;
5218   int64_t Nfmt = NFMT_UNDEF;
5219   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5220     return MatchOperand_ParseFail;
5221 
5222   if (trySkipToken(AsmToken::Comma)) {
5223     StringRef Str;
5224     SMLoc Loc = getLoc();
5225     if (!parseId(Str, "expected a format string") ||
5226         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5227       return MatchOperand_ParseFail;
5228     }
5229     if (Dfmt == DFMT_UNDEF) {
5230       Error(Loc, "duplicate numeric format");
5231       return MatchOperand_ParseFail;
5232     } else if (Nfmt == NFMT_UNDEF) {
5233       Error(Loc, "duplicate data format");
5234       return MatchOperand_ParseFail;
5235     }
5236   }
5237 
5238   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5239   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5240 
5241   if (isGFX10Plus()) {
5242     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5243     if (Ufmt == UFMT_UNDEF) {
5244       Error(FormatLoc, "unsupported format");
5245       return MatchOperand_ParseFail;
5246     }
5247     Format = Ufmt;
5248   } else {
5249     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5250   }
5251 
5252   return MatchOperand_Success;
5253 }
5254 
5255 OperandMatchResultTy
5256 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5257                                             SMLoc Loc,
5258                                             int64_t &Format) {
5259   using namespace llvm::AMDGPU::MTBUFFormat;
5260 
5261   auto Id = getUnifiedFormat(FormatStr);
5262   if (Id == UFMT_UNDEF)
5263     return MatchOperand_NoMatch;
5264 
5265   if (!isGFX10Plus()) {
5266     Error(Loc, "unified format is not supported on this GPU");
5267     return MatchOperand_ParseFail;
5268   }
5269 
5270   Format = Id;
5271   return MatchOperand_Success;
5272 }
5273 
5274 OperandMatchResultTy
5275 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5276   using namespace llvm::AMDGPU::MTBUFFormat;
5277   SMLoc Loc = getLoc();
5278 
5279   if (!parseExpr(Format))
5280     return MatchOperand_ParseFail;
5281   if (!isValidFormatEncoding(Format, getSTI())) {
5282     Error(Loc, "out of range format");
5283     return MatchOperand_ParseFail;
5284   }
5285 
5286   return MatchOperand_Success;
5287 }
5288 
5289 OperandMatchResultTy
5290 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5291   using namespace llvm::AMDGPU::MTBUFFormat;
5292 
5293   if (!trySkipId("format", AsmToken::Colon))
5294     return MatchOperand_NoMatch;
5295 
5296   if (trySkipToken(AsmToken::LBrac)) {
5297     StringRef FormatStr;
5298     SMLoc Loc = getLoc();
5299     if (!parseId(FormatStr, "expected a format string"))
5300       return MatchOperand_ParseFail;
5301 
5302     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5303     if (Res == MatchOperand_NoMatch)
5304       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5305     if (Res != MatchOperand_Success)
5306       return Res;
5307 
5308     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5309       return MatchOperand_ParseFail;
5310 
5311     return MatchOperand_Success;
5312   }
5313 
5314   return parseNumericFormat(Format);
5315 }
5316 
5317 OperandMatchResultTy
5318 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5319   using namespace llvm::AMDGPU::MTBUFFormat;
5320 
5321   int64_t Format = getDefaultFormatEncoding(getSTI());
5322   OperandMatchResultTy Res;
5323   SMLoc Loc = getLoc();
5324 
5325   // Parse legacy format syntax.
5326   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5327   if (Res == MatchOperand_ParseFail)
5328     return Res;
5329 
5330   bool FormatFound = (Res == MatchOperand_Success);
5331 
5332   Operands.push_back(
5333     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5334 
5335   if (FormatFound)
5336     trySkipToken(AsmToken::Comma);
5337 
5338   if (isToken(AsmToken::EndOfStatement)) {
5339     // We are expecting an soffset operand,
5340     // but let matcher handle the error.
5341     return MatchOperand_Success;
5342   }
5343 
5344   // Parse soffset.
5345   Res = parseRegOrImm(Operands);
5346   if (Res != MatchOperand_Success)
5347     return Res;
5348 
5349   trySkipToken(AsmToken::Comma);
5350 
5351   if (!FormatFound) {
5352     Res = parseSymbolicOrNumericFormat(Format);
5353     if (Res == MatchOperand_ParseFail)
5354       return Res;
5355     if (Res == MatchOperand_Success) {
5356       auto Size = Operands.size();
5357       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5358       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5359       Op.setImm(Format);
5360     }
5361     return MatchOperand_Success;
5362   }
5363 
5364   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5365     Error(getLoc(), "duplicate format");
5366     return MatchOperand_ParseFail;
5367   }
5368   return MatchOperand_Success;
5369 }
5370 
5371 //===----------------------------------------------------------------------===//
5372 // ds
5373 //===----------------------------------------------------------------------===//
5374 
5375 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5376                                     const OperandVector &Operands) {
5377   OptionalImmIndexMap OptionalIdx;
5378 
5379   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5380     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5381 
5382     // Add the register arguments
5383     if (Op.isReg()) {
5384       Op.addRegOperands(Inst, 1);
5385       continue;
5386     }
5387 
5388     // Handle optional arguments
5389     OptionalIdx[Op.getImmTy()] = i;
5390   }
5391 
5392   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5393   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5394   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5395 
5396   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5397 }
5398 
5399 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5400                                 bool IsGdsHardcoded) {
5401   OptionalImmIndexMap OptionalIdx;
5402 
5403   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5404     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5405 
5406     // Add the register arguments
5407     if (Op.isReg()) {
5408       Op.addRegOperands(Inst, 1);
5409       continue;
5410     }
5411 
5412     if (Op.isToken() && Op.getToken() == "gds") {
5413       IsGdsHardcoded = true;
5414       continue;
5415     }
5416 
5417     // Handle optional arguments
5418     OptionalIdx[Op.getImmTy()] = i;
5419   }
5420 
5421   AMDGPUOperand::ImmTy OffsetType =
5422     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5423      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5424      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5425                                                       AMDGPUOperand::ImmTyOffset;
5426 
5427   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5428 
5429   if (!IsGdsHardcoded) {
5430     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5431   }
5432   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5433 }
5434 
5435 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5436   OptionalImmIndexMap OptionalIdx;
5437 
5438   unsigned OperandIdx[4];
5439   unsigned EnMask = 0;
5440   int SrcIdx = 0;
5441 
5442   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5443     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5444 
5445     // Add the register arguments
5446     if (Op.isReg()) {
5447       assert(SrcIdx < 4);
5448       OperandIdx[SrcIdx] = Inst.size();
5449       Op.addRegOperands(Inst, 1);
5450       ++SrcIdx;
5451       continue;
5452     }
5453 
5454     if (Op.isOff()) {
5455       assert(SrcIdx < 4);
5456       OperandIdx[SrcIdx] = Inst.size();
5457       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5458       ++SrcIdx;
5459       continue;
5460     }
5461 
5462     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5463       Op.addImmOperands(Inst, 1);
5464       continue;
5465     }
5466 
5467     if (Op.isToken() && Op.getToken() == "done")
5468       continue;
5469 
5470     // Handle optional arguments
5471     OptionalIdx[Op.getImmTy()] = i;
5472   }
5473 
5474   assert(SrcIdx == 4);
5475 
5476   bool Compr = false;
5477   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5478     Compr = true;
5479     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5480     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5481     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5482   }
5483 
5484   for (auto i = 0; i < SrcIdx; ++i) {
5485     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5486       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5487     }
5488   }
5489 
5490   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5491   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5492 
5493   Inst.addOperand(MCOperand::createImm(EnMask));
5494 }
5495 
5496 //===----------------------------------------------------------------------===//
5497 // s_waitcnt
5498 //===----------------------------------------------------------------------===//
5499 
5500 static bool
5501 encodeCnt(
5502   const AMDGPU::IsaVersion ISA,
5503   int64_t &IntVal,
5504   int64_t CntVal,
5505   bool Saturate,
5506   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5507   unsigned (*decode)(const IsaVersion &Version, unsigned))
5508 {
5509   bool Failed = false;
5510 
5511   IntVal = encode(ISA, IntVal, CntVal);
5512   if (CntVal != decode(ISA, IntVal)) {
5513     if (Saturate) {
5514       IntVal = encode(ISA, IntVal, -1);
5515     } else {
5516       Failed = true;
5517     }
5518   }
5519   return Failed;
5520 }
5521 
5522 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5523 
5524   SMLoc CntLoc = getLoc();
5525   StringRef CntName = getTokenStr();
5526 
5527   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5528       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5529     return false;
5530 
5531   int64_t CntVal;
5532   SMLoc ValLoc = getLoc();
5533   if (!parseExpr(CntVal))
5534     return false;
5535 
5536   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5537 
5538   bool Failed = true;
5539   bool Sat = CntName.endswith("_sat");
5540 
5541   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5542     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5543   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5544     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5545   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5546     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5547   } else {
5548     Error(CntLoc, "invalid counter name " + CntName);
5549     return false;
5550   }
5551 
5552   if (Failed) {
5553     Error(ValLoc, "too large value for " + CntName);
5554     return false;
5555   }
5556 
5557   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5558     return false;
5559 
5560   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5561     if (isToken(AsmToken::EndOfStatement)) {
5562       Error(getLoc(), "expected a counter name");
5563       return false;
5564     }
5565   }
5566 
5567   return true;
5568 }
5569 
5570 OperandMatchResultTy
5571 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5572   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5573   int64_t Waitcnt = getWaitcntBitMask(ISA);
5574   SMLoc S = getLoc();
5575 
5576   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5577     while (!isToken(AsmToken::EndOfStatement)) {
5578       if (!parseCnt(Waitcnt))
5579         return MatchOperand_ParseFail;
5580     }
5581   } else {
5582     if (!parseExpr(Waitcnt))
5583       return MatchOperand_ParseFail;
5584   }
5585 
5586   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5587   return MatchOperand_Success;
5588 }
5589 
5590 bool
5591 AMDGPUOperand::isSWaitCnt() const {
5592   return isImm();
5593 }
5594 
5595 //===----------------------------------------------------------------------===//
5596 // hwreg
5597 //===----------------------------------------------------------------------===//
5598 
5599 bool
5600 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5601                                 OperandInfoTy &Offset,
5602                                 OperandInfoTy &Width) {
5603   using namespace llvm::AMDGPU::Hwreg;
5604 
5605   // The register may be specified by name or using a numeric code
5606   HwReg.Loc = getLoc();
5607   if (isToken(AsmToken::Identifier) &&
5608       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5609     HwReg.IsSymbolic = true;
5610     lex(); // skip register name
5611   } else if (!parseExpr(HwReg.Id, "a register name")) {
5612     return false;
5613   }
5614 
5615   if (trySkipToken(AsmToken::RParen))
5616     return true;
5617 
5618   // parse optional params
5619   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5620     return false;
5621 
5622   Offset.Loc = getLoc();
5623   if (!parseExpr(Offset.Id))
5624     return false;
5625 
5626   if (!skipToken(AsmToken::Comma, "expected a comma"))
5627     return false;
5628 
5629   Width.Loc = getLoc();
5630   return parseExpr(Width.Id) &&
5631          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5632 }
5633 
5634 bool
5635 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5636                                const OperandInfoTy &Offset,
5637                                const OperandInfoTy &Width) {
5638 
5639   using namespace llvm::AMDGPU::Hwreg;
5640 
5641   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5642     Error(HwReg.Loc,
5643           "specified hardware register is not supported on this GPU");
5644     return false;
5645   }
5646   if (!isValidHwreg(HwReg.Id)) {
5647     Error(HwReg.Loc,
5648           "invalid code of hardware register: only 6-bit values are legal");
5649     return false;
5650   }
5651   if (!isValidHwregOffset(Offset.Id)) {
5652     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5653     return false;
5654   }
5655   if (!isValidHwregWidth(Width.Id)) {
5656     Error(Width.Loc,
5657           "invalid bitfield width: only values from 1 to 32 are legal");
5658     return false;
5659   }
5660   return true;
5661 }
5662 
5663 OperandMatchResultTy
5664 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5665   using namespace llvm::AMDGPU::Hwreg;
5666 
5667   int64_t ImmVal = 0;
5668   SMLoc Loc = getLoc();
5669 
5670   if (trySkipId("hwreg", AsmToken::LParen)) {
5671     OperandInfoTy HwReg(ID_UNKNOWN_);
5672     OperandInfoTy Offset(OFFSET_DEFAULT_);
5673     OperandInfoTy Width(WIDTH_DEFAULT_);
5674     if (parseHwregBody(HwReg, Offset, Width) &&
5675         validateHwreg(HwReg, Offset, Width)) {
5676       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5677     } else {
5678       return MatchOperand_ParseFail;
5679     }
5680   } else if (parseExpr(ImmVal, "a hwreg macro")) {
5681     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5682       Error(Loc, "invalid immediate: only 16-bit values are legal");
5683       return MatchOperand_ParseFail;
5684     }
5685   } else {
5686     return MatchOperand_ParseFail;
5687   }
5688 
5689   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5690   return MatchOperand_Success;
5691 }
5692 
5693 bool AMDGPUOperand::isHwreg() const {
5694   return isImmTy(ImmTyHwreg);
5695 }
5696 
5697 //===----------------------------------------------------------------------===//
5698 // sendmsg
5699 //===----------------------------------------------------------------------===//
5700 
5701 bool
5702 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5703                                   OperandInfoTy &Op,
5704                                   OperandInfoTy &Stream) {
5705   using namespace llvm::AMDGPU::SendMsg;
5706 
5707   Msg.Loc = getLoc();
5708   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5709     Msg.IsSymbolic = true;
5710     lex(); // skip message name
5711   } else if (!parseExpr(Msg.Id, "a message name")) {
5712     return false;
5713   }
5714 
5715   if (trySkipToken(AsmToken::Comma)) {
5716     Op.IsDefined = true;
5717     Op.Loc = getLoc();
5718     if (isToken(AsmToken::Identifier) &&
5719         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5720       lex(); // skip operation name
5721     } else if (!parseExpr(Op.Id, "an operation name")) {
5722       return false;
5723     }
5724 
5725     if (trySkipToken(AsmToken::Comma)) {
5726       Stream.IsDefined = true;
5727       Stream.Loc = getLoc();
5728       if (!parseExpr(Stream.Id))
5729         return false;
5730     }
5731   }
5732 
5733   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5734 }
5735 
5736 bool
5737 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5738                                  const OperandInfoTy &Op,
5739                                  const OperandInfoTy &Stream) {
5740   using namespace llvm::AMDGPU::SendMsg;
5741 
5742   // Validation strictness depends on whether message is specified
5743   // in a symbolc or in a numeric form. In the latter case
5744   // only encoding possibility is checked.
5745   bool Strict = Msg.IsSymbolic;
5746 
5747   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5748     Error(Msg.Loc, "invalid message id");
5749     return false;
5750   }
5751   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5752     if (Op.IsDefined) {
5753       Error(Op.Loc, "message does not support operations");
5754     } else {
5755       Error(Msg.Loc, "missing message operation");
5756     }
5757     return false;
5758   }
5759   if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5760     Error(Op.Loc, "invalid operation id");
5761     return false;
5762   }
5763   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5764     Error(Stream.Loc, "message operation does not support streams");
5765     return false;
5766   }
5767   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5768     Error(Stream.Loc, "invalid message stream id");
5769     return false;
5770   }
5771   return true;
5772 }
5773 
5774 OperandMatchResultTy
5775 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5776   using namespace llvm::AMDGPU::SendMsg;
5777 
5778   int64_t ImmVal = 0;
5779   SMLoc Loc = getLoc();
5780 
5781   if (trySkipId("sendmsg", AsmToken::LParen)) {
5782     OperandInfoTy Msg(ID_UNKNOWN_);
5783     OperandInfoTy Op(OP_NONE_);
5784     OperandInfoTy Stream(STREAM_ID_NONE_);
5785     if (parseSendMsgBody(Msg, Op, Stream) &&
5786         validateSendMsg(Msg, Op, Stream)) {
5787       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5788     } else {
5789       return MatchOperand_ParseFail;
5790     }
5791   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
5792     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5793       Error(Loc, "invalid immediate: only 16-bit values are legal");
5794       return MatchOperand_ParseFail;
5795     }
5796   } else {
5797     return MatchOperand_ParseFail;
5798   }
5799 
5800   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5801   return MatchOperand_Success;
5802 }
5803 
5804 bool AMDGPUOperand::isSendMsg() const {
5805   return isImmTy(ImmTySendMsg);
5806 }
5807 
5808 //===----------------------------------------------------------------------===//
5809 // v_interp
5810 //===----------------------------------------------------------------------===//
5811 
5812 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5813   StringRef Str;
5814   SMLoc S = getLoc();
5815 
5816   if (!parseId(Str))
5817     return MatchOperand_NoMatch;
5818 
5819   int Slot = StringSwitch<int>(Str)
5820     .Case("p10", 0)
5821     .Case("p20", 1)
5822     .Case("p0", 2)
5823     .Default(-1);
5824 
5825   if (Slot == -1) {
5826     Error(S, "invalid interpolation slot");
5827     return MatchOperand_ParseFail;
5828   }
5829 
5830   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5831                                               AMDGPUOperand::ImmTyInterpSlot));
5832   return MatchOperand_Success;
5833 }
5834 
5835 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5836   StringRef Str;
5837   SMLoc S = getLoc();
5838 
5839   if (!parseId(Str))
5840     return MatchOperand_NoMatch;
5841 
5842   if (!Str.startswith("attr")) {
5843     Error(S, "invalid interpolation attribute");
5844     return MatchOperand_ParseFail;
5845   }
5846 
5847   StringRef Chan = Str.take_back(2);
5848   int AttrChan = StringSwitch<int>(Chan)
5849     .Case(".x", 0)
5850     .Case(".y", 1)
5851     .Case(".z", 2)
5852     .Case(".w", 3)
5853     .Default(-1);
5854   if (AttrChan == -1) {
5855     Error(S, "invalid or missing interpolation attribute channel");
5856     return MatchOperand_ParseFail;
5857   }
5858 
5859   Str = Str.drop_back(2).drop_front(4);
5860 
5861   uint8_t Attr;
5862   if (Str.getAsInteger(10, Attr)) {
5863     Error(S, "invalid or missing interpolation attribute number");
5864     return MatchOperand_ParseFail;
5865   }
5866 
5867   if (Attr > 63) {
5868     Error(S, "out of bounds interpolation attribute number");
5869     return MatchOperand_ParseFail;
5870   }
5871 
5872   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5873 
5874   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5875                                               AMDGPUOperand::ImmTyInterpAttr));
5876   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5877                                               AMDGPUOperand::ImmTyAttrChan));
5878   return MatchOperand_Success;
5879 }
5880 
5881 //===----------------------------------------------------------------------===//
5882 // exp
5883 //===----------------------------------------------------------------------===//
5884 
5885 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5886   using namespace llvm::AMDGPU::Exp;
5887 
5888   StringRef Str;
5889   SMLoc S = getLoc();
5890 
5891   if (!parseId(Str))
5892     return MatchOperand_NoMatch;
5893 
5894   unsigned Id = getTgtId(Str);
5895   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
5896     Error(S, (Id == ET_INVALID) ?
5897                 "invalid exp target" :
5898                 "exp target is not supported on this GPU");
5899     return MatchOperand_ParseFail;
5900   }
5901 
5902   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
5903                                               AMDGPUOperand::ImmTyExpTgt));
5904   return MatchOperand_Success;
5905 }
5906 
5907 //===----------------------------------------------------------------------===//
5908 // parser helpers
5909 //===----------------------------------------------------------------------===//
5910 
5911 bool
5912 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5913   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5914 }
5915 
5916 bool
5917 AMDGPUAsmParser::isId(const StringRef Id) const {
5918   return isId(getToken(), Id);
5919 }
5920 
5921 bool
5922 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5923   return getTokenKind() == Kind;
5924 }
5925 
5926 bool
5927 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5928   if (isId(Id)) {
5929     lex();
5930     return true;
5931   }
5932   return false;
5933 }
5934 
5935 bool
5936 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
5937   if (isToken(AsmToken::Identifier)) {
5938     StringRef Tok = getTokenStr();
5939     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
5940       lex();
5941       return true;
5942     }
5943   }
5944   return false;
5945 }
5946 
5947 bool
5948 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5949   if (isId(Id) && peekToken().is(Kind)) {
5950     lex();
5951     lex();
5952     return true;
5953   }
5954   return false;
5955 }
5956 
5957 bool
5958 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5959   if (isToken(Kind)) {
5960     lex();
5961     return true;
5962   }
5963   return false;
5964 }
5965 
5966 bool
5967 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5968                            const StringRef ErrMsg) {
5969   if (!trySkipToken(Kind)) {
5970     Error(getLoc(), ErrMsg);
5971     return false;
5972   }
5973   return true;
5974 }
5975 
5976 bool
5977 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
5978   SMLoc S = getLoc();
5979 
5980   const MCExpr *Expr;
5981   if (Parser.parseExpression(Expr))
5982     return false;
5983 
5984   if (Expr->evaluateAsAbsolute(Imm))
5985     return true;
5986 
5987   if (Expected.empty()) {
5988     Error(S, "expected absolute expression");
5989   } else {
5990     Error(S, Twine("expected ", Expected) +
5991              Twine(" or an absolute expression"));
5992   }
5993   return false;
5994 }
5995 
5996 bool
5997 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5998   SMLoc S = getLoc();
5999 
6000   const MCExpr *Expr;
6001   if (Parser.parseExpression(Expr))
6002     return false;
6003 
6004   int64_t IntVal;
6005   if (Expr->evaluateAsAbsolute(IntVal)) {
6006     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6007   } else {
6008     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6009   }
6010   return true;
6011 }
6012 
6013 bool
6014 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6015   if (isToken(AsmToken::String)) {
6016     Val = getToken().getStringContents();
6017     lex();
6018     return true;
6019   } else {
6020     Error(getLoc(), ErrMsg);
6021     return false;
6022   }
6023 }
6024 
6025 bool
6026 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6027   if (isToken(AsmToken::Identifier)) {
6028     Val = getTokenStr();
6029     lex();
6030     return true;
6031   } else {
6032     if (!ErrMsg.empty())
6033       Error(getLoc(), ErrMsg);
6034     return false;
6035   }
6036 }
6037 
6038 AsmToken
6039 AMDGPUAsmParser::getToken() const {
6040   return Parser.getTok();
6041 }
6042 
6043 AsmToken
6044 AMDGPUAsmParser::peekToken() {
6045   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6046 }
6047 
6048 void
6049 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6050   auto TokCount = getLexer().peekTokens(Tokens);
6051 
6052   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6053     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6054 }
6055 
6056 AsmToken::TokenKind
6057 AMDGPUAsmParser::getTokenKind() const {
6058   return getLexer().getKind();
6059 }
6060 
6061 SMLoc
6062 AMDGPUAsmParser::getLoc() const {
6063   return getToken().getLoc();
6064 }
6065 
6066 StringRef
6067 AMDGPUAsmParser::getTokenStr() const {
6068   return getToken().getString();
6069 }
6070 
6071 void
6072 AMDGPUAsmParser::lex() {
6073   Parser.Lex();
6074 }
6075 
6076 SMLoc
6077 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6078                                const OperandVector &Operands) const {
6079   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6080     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6081     if (Test(Op))
6082       return Op.getStartLoc();
6083   }
6084   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6085 }
6086 
6087 SMLoc
6088 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6089                            const OperandVector &Operands) const {
6090   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6091   return getOperandLoc(Test, Operands);
6092 }
6093 
6094 SMLoc
6095 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6096                            const OperandVector &Operands) const {
6097   auto Test = [=](const AMDGPUOperand& Op) {
6098     return Op.isRegKind() && Op.getReg() == Reg;
6099   };
6100   return getOperandLoc(Test, Operands);
6101 }
6102 
6103 SMLoc
6104 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6105   auto Test = [](const AMDGPUOperand& Op) {
6106     return Op.IsImmKindLiteral() || Op.isExpr();
6107   };
6108   return getOperandLoc(Test, Operands);
6109 }
6110 
6111 SMLoc
6112 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6113   auto Test = [](const AMDGPUOperand& Op) {
6114     return Op.isImmKindConst();
6115   };
6116   return getOperandLoc(Test, Operands);
6117 }
6118 
6119 //===----------------------------------------------------------------------===//
6120 // swizzle
6121 //===----------------------------------------------------------------------===//
6122 
6123 LLVM_READNONE
6124 static unsigned
6125 encodeBitmaskPerm(const unsigned AndMask,
6126                   const unsigned OrMask,
6127                   const unsigned XorMask) {
6128   using namespace llvm::AMDGPU::Swizzle;
6129 
6130   return BITMASK_PERM_ENC |
6131          (AndMask << BITMASK_AND_SHIFT) |
6132          (OrMask  << BITMASK_OR_SHIFT)  |
6133          (XorMask << BITMASK_XOR_SHIFT);
6134 }
6135 
6136 bool
6137 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6138                                      const unsigned MinVal,
6139                                      const unsigned MaxVal,
6140                                      const StringRef ErrMsg,
6141                                      SMLoc &Loc) {
6142   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6143     return false;
6144   }
6145   Loc = getLoc();
6146   if (!parseExpr(Op)) {
6147     return false;
6148   }
6149   if (Op < MinVal || Op > MaxVal) {
6150     Error(Loc, ErrMsg);
6151     return false;
6152   }
6153 
6154   return true;
6155 }
6156 
6157 bool
6158 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6159                                       const unsigned MinVal,
6160                                       const unsigned MaxVal,
6161                                       const StringRef ErrMsg) {
6162   SMLoc Loc;
6163   for (unsigned i = 0; i < OpNum; ++i) {
6164     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6165       return false;
6166   }
6167 
6168   return true;
6169 }
6170 
6171 bool
6172 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6173   using namespace llvm::AMDGPU::Swizzle;
6174 
6175   int64_t Lane[LANE_NUM];
6176   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6177                            "expected a 2-bit lane id")) {
6178     Imm = QUAD_PERM_ENC;
6179     for (unsigned I = 0; I < LANE_NUM; ++I) {
6180       Imm |= Lane[I] << (LANE_SHIFT * I);
6181     }
6182     return true;
6183   }
6184   return false;
6185 }
6186 
6187 bool
6188 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6189   using namespace llvm::AMDGPU::Swizzle;
6190 
6191   SMLoc Loc;
6192   int64_t GroupSize;
6193   int64_t LaneIdx;
6194 
6195   if (!parseSwizzleOperand(GroupSize,
6196                            2, 32,
6197                            "group size must be in the interval [2,32]",
6198                            Loc)) {
6199     return false;
6200   }
6201   if (!isPowerOf2_64(GroupSize)) {
6202     Error(Loc, "group size must be a power of two");
6203     return false;
6204   }
6205   if (parseSwizzleOperand(LaneIdx,
6206                           0, GroupSize - 1,
6207                           "lane id must be in the interval [0,group size - 1]",
6208                           Loc)) {
6209     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6210     return true;
6211   }
6212   return false;
6213 }
6214 
6215 bool
6216 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6217   using namespace llvm::AMDGPU::Swizzle;
6218 
6219   SMLoc Loc;
6220   int64_t GroupSize;
6221 
6222   if (!parseSwizzleOperand(GroupSize,
6223                            2, 32,
6224                            "group size must be in the interval [2,32]",
6225                            Loc)) {
6226     return false;
6227   }
6228   if (!isPowerOf2_64(GroupSize)) {
6229     Error(Loc, "group size must be a power of two");
6230     return false;
6231   }
6232 
6233   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6234   return true;
6235 }
6236 
6237 bool
6238 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6239   using namespace llvm::AMDGPU::Swizzle;
6240 
6241   SMLoc Loc;
6242   int64_t GroupSize;
6243 
6244   if (!parseSwizzleOperand(GroupSize,
6245                            1, 16,
6246                            "group size must be in the interval [1,16]",
6247                            Loc)) {
6248     return false;
6249   }
6250   if (!isPowerOf2_64(GroupSize)) {
6251     Error(Loc, "group size must be a power of two");
6252     return false;
6253   }
6254 
6255   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6256   return true;
6257 }
6258 
6259 bool
6260 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6261   using namespace llvm::AMDGPU::Swizzle;
6262 
6263   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6264     return false;
6265   }
6266 
6267   StringRef Ctl;
6268   SMLoc StrLoc = getLoc();
6269   if (!parseString(Ctl)) {
6270     return false;
6271   }
6272   if (Ctl.size() != BITMASK_WIDTH) {
6273     Error(StrLoc, "expected a 5-character mask");
6274     return false;
6275   }
6276 
6277   unsigned AndMask = 0;
6278   unsigned OrMask = 0;
6279   unsigned XorMask = 0;
6280 
6281   for (size_t i = 0; i < Ctl.size(); ++i) {
6282     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6283     switch(Ctl[i]) {
6284     default:
6285       Error(StrLoc, "invalid mask");
6286       return false;
6287     case '0':
6288       break;
6289     case '1':
6290       OrMask |= Mask;
6291       break;
6292     case 'p':
6293       AndMask |= Mask;
6294       break;
6295     case 'i':
6296       AndMask |= Mask;
6297       XorMask |= Mask;
6298       break;
6299     }
6300   }
6301 
6302   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6303   return true;
6304 }
6305 
6306 bool
6307 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6308 
6309   SMLoc OffsetLoc = getLoc();
6310 
6311   if (!parseExpr(Imm, "a swizzle macro")) {
6312     return false;
6313   }
6314   if (!isUInt<16>(Imm)) {
6315     Error(OffsetLoc, "expected a 16-bit offset");
6316     return false;
6317   }
6318   return true;
6319 }
6320 
6321 bool
6322 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6323   using namespace llvm::AMDGPU::Swizzle;
6324 
6325   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6326 
6327     SMLoc ModeLoc = getLoc();
6328     bool Ok = false;
6329 
6330     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6331       Ok = parseSwizzleQuadPerm(Imm);
6332     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6333       Ok = parseSwizzleBitmaskPerm(Imm);
6334     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6335       Ok = parseSwizzleBroadcast(Imm);
6336     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6337       Ok = parseSwizzleSwap(Imm);
6338     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6339       Ok = parseSwizzleReverse(Imm);
6340     } else {
6341       Error(ModeLoc, "expected a swizzle mode");
6342     }
6343 
6344     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6345   }
6346 
6347   return false;
6348 }
6349 
6350 OperandMatchResultTy
6351 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6352   SMLoc S = getLoc();
6353   int64_t Imm = 0;
6354 
6355   if (trySkipId("offset")) {
6356 
6357     bool Ok = false;
6358     if (skipToken(AsmToken::Colon, "expected a colon")) {
6359       if (trySkipId("swizzle")) {
6360         Ok = parseSwizzleMacro(Imm);
6361       } else {
6362         Ok = parseSwizzleOffset(Imm);
6363       }
6364     }
6365 
6366     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6367 
6368     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6369   } else {
6370     // Swizzle "offset" operand is optional.
6371     // If it is omitted, try parsing other optional operands.
6372     return parseOptionalOpr(Operands);
6373   }
6374 }
6375 
6376 bool
6377 AMDGPUOperand::isSwizzle() const {
6378   return isImmTy(ImmTySwizzle);
6379 }
6380 
6381 //===----------------------------------------------------------------------===//
6382 // VGPR Index Mode
6383 //===----------------------------------------------------------------------===//
6384 
6385 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6386 
6387   using namespace llvm::AMDGPU::VGPRIndexMode;
6388 
6389   if (trySkipToken(AsmToken::RParen)) {
6390     return OFF;
6391   }
6392 
6393   int64_t Imm = 0;
6394 
6395   while (true) {
6396     unsigned Mode = 0;
6397     SMLoc S = getLoc();
6398 
6399     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6400       if (trySkipId(IdSymbolic[ModeId])) {
6401         Mode = 1 << ModeId;
6402         break;
6403       }
6404     }
6405 
6406     if (Mode == 0) {
6407       Error(S, (Imm == 0)?
6408                "expected a VGPR index mode or a closing parenthesis" :
6409                "expected a VGPR index mode");
6410       return UNDEF;
6411     }
6412 
6413     if (Imm & Mode) {
6414       Error(S, "duplicate VGPR index mode");
6415       return UNDEF;
6416     }
6417     Imm |= Mode;
6418 
6419     if (trySkipToken(AsmToken::RParen))
6420       break;
6421     if (!skipToken(AsmToken::Comma,
6422                    "expected a comma or a closing parenthesis"))
6423       return UNDEF;
6424   }
6425 
6426   return Imm;
6427 }
6428 
6429 OperandMatchResultTy
6430 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6431 
6432   using namespace llvm::AMDGPU::VGPRIndexMode;
6433 
6434   int64_t Imm = 0;
6435   SMLoc S = getLoc();
6436 
6437   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6438     Imm = parseGPRIdxMacro();
6439     if (Imm == UNDEF)
6440       return MatchOperand_ParseFail;
6441   } else {
6442     if (getParser().parseAbsoluteExpression(Imm))
6443       return MatchOperand_ParseFail;
6444     if (Imm < 0 || !isUInt<4>(Imm)) {
6445       Error(S, "invalid immediate: only 4-bit values are legal");
6446       return MatchOperand_ParseFail;
6447     }
6448   }
6449 
6450   Operands.push_back(
6451       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6452   return MatchOperand_Success;
6453 }
6454 
6455 bool AMDGPUOperand::isGPRIdxMode() const {
6456   return isImmTy(ImmTyGprIdxMode);
6457 }
6458 
6459 //===----------------------------------------------------------------------===//
6460 // sopp branch targets
6461 //===----------------------------------------------------------------------===//
6462 
6463 OperandMatchResultTy
6464 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6465 
6466   // Make sure we are not parsing something
6467   // that looks like a label or an expression but is not.
6468   // This will improve error messages.
6469   if (isRegister() || isModifier())
6470     return MatchOperand_NoMatch;
6471 
6472   if (!parseExpr(Operands))
6473     return MatchOperand_ParseFail;
6474 
6475   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6476   assert(Opr.isImm() || Opr.isExpr());
6477   SMLoc Loc = Opr.getStartLoc();
6478 
6479   // Currently we do not support arbitrary expressions as branch targets.
6480   // Only labels and absolute expressions are accepted.
6481   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6482     Error(Loc, "expected an absolute expression or a label");
6483   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6484     Error(Loc, "expected a 16-bit signed jump offset");
6485   }
6486 
6487   return MatchOperand_Success;
6488 }
6489 
6490 //===----------------------------------------------------------------------===//
6491 // Boolean holding registers
6492 //===----------------------------------------------------------------------===//
6493 
6494 OperandMatchResultTy
6495 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6496   return parseReg(Operands);
6497 }
6498 
6499 //===----------------------------------------------------------------------===//
6500 // mubuf
6501 //===----------------------------------------------------------------------===//
6502 
6503 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6504   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6505 }
6506 
6507 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6508   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6509 }
6510 
6511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6512   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6513 }
6514 
6515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6516   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6517 }
6518 
6519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6520                                const OperandVector &Operands,
6521                                bool IsAtomic,
6522                                bool IsAtomicReturn,
6523                                bool IsLds) {
6524   bool IsLdsOpcode = IsLds;
6525   bool HasLdsModifier = false;
6526   OptionalImmIndexMap OptionalIdx;
6527   assert(IsAtomicReturn ? IsAtomic : true);
6528   unsigned FirstOperandIdx = 1;
6529 
6530   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6531     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6532 
6533     // Add the register arguments
6534     if (Op.isReg()) {
6535       Op.addRegOperands(Inst, 1);
6536       // Insert a tied src for atomic return dst.
6537       // This cannot be postponed as subsequent calls to
6538       // addImmOperands rely on correct number of MC operands.
6539       if (IsAtomicReturn && i == FirstOperandIdx)
6540         Op.addRegOperands(Inst, 1);
6541       continue;
6542     }
6543 
6544     // Handle the case where soffset is an immediate
6545     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6546       Op.addImmOperands(Inst, 1);
6547       continue;
6548     }
6549 
6550     HasLdsModifier |= Op.isLDS();
6551 
6552     // Handle tokens like 'offen' which are sometimes hard-coded into the
6553     // asm string.  There are no MCInst operands for these.
6554     if (Op.isToken()) {
6555       continue;
6556     }
6557     assert(Op.isImm());
6558 
6559     // Handle optional arguments
6560     OptionalIdx[Op.getImmTy()] = i;
6561   }
6562 
6563   // This is a workaround for an llvm quirk which may result in an
6564   // incorrect instruction selection. Lds and non-lds versions of
6565   // MUBUF instructions are identical except that lds versions
6566   // have mandatory 'lds' modifier. However this modifier follows
6567   // optional modifiers and llvm asm matcher regards this 'lds'
6568   // modifier as an optional one. As a result, an lds version
6569   // of opcode may be selected even if it has no 'lds' modifier.
6570   if (IsLdsOpcode && !HasLdsModifier) {
6571     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6572     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6573       Inst.setOpcode(NoLdsOpcode);
6574       IsLdsOpcode = false;
6575     }
6576   }
6577 
6578   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6579   if (!IsAtomic || IsAtomicReturn) {
6580     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6581                           IsAtomicReturn ? -1 : 0);
6582   }
6583   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6584 
6585   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6586     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6587   }
6588 
6589   if (isGFX10Plus())
6590     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6591 }
6592 
6593 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6594   OptionalImmIndexMap OptionalIdx;
6595 
6596   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6597     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6598 
6599     // Add the register arguments
6600     if (Op.isReg()) {
6601       Op.addRegOperands(Inst, 1);
6602       continue;
6603     }
6604 
6605     // Handle the case where soffset is an immediate
6606     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6607       Op.addImmOperands(Inst, 1);
6608       continue;
6609     }
6610 
6611     // Handle tokens like 'offen' which are sometimes hard-coded into the
6612     // asm string.  There are no MCInst operands for these.
6613     if (Op.isToken()) {
6614       continue;
6615     }
6616     assert(Op.isImm());
6617 
6618     // Handle optional arguments
6619     OptionalIdx[Op.getImmTy()] = i;
6620   }
6621 
6622   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6623                         AMDGPUOperand::ImmTyOffset);
6624   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6625   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6626   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6627   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6628 
6629   if (isGFX10Plus())
6630     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6631 }
6632 
6633 //===----------------------------------------------------------------------===//
6634 // mimg
6635 //===----------------------------------------------------------------------===//
6636 
6637 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6638                               bool IsAtomic) {
6639   unsigned I = 1;
6640   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6641   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6642     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6643   }
6644 
6645   if (IsAtomic) {
6646     // Add src, same as dst
6647     assert(Desc.getNumDefs() == 1);
6648     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6649   }
6650 
6651   OptionalImmIndexMap OptionalIdx;
6652 
6653   for (unsigned E = Operands.size(); I != E; ++I) {
6654     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6655 
6656     // Add the register arguments
6657     if (Op.isReg()) {
6658       Op.addRegOperands(Inst, 1);
6659     } else if (Op.isImmModifier()) {
6660       OptionalIdx[Op.getImmTy()] = I;
6661     } else if (!Op.isToken()) {
6662       llvm_unreachable("unexpected operand type");
6663     }
6664   }
6665 
6666   bool IsGFX10Plus = isGFX10Plus();
6667 
6668   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6669   if (IsGFX10Plus)
6670     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6671   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6672   if (IsGFX10Plus)
6673     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6674   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6675   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6676   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6677   if (IsGFX10Plus)
6678     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6679   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6680   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6681   if (!IsGFX10Plus)
6682     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6683   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6684 }
6685 
6686 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6687   cvtMIMG(Inst, Operands, true);
6688 }
6689 
6690 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6691                                       const OperandVector &Operands) {
6692   for (unsigned I = 1; I < Operands.size(); ++I) {
6693     auto &Operand = (AMDGPUOperand &)*Operands[I];
6694     if (Operand.isReg())
6695       Operand.addRegOperands(Inst, 1);
6696   }
6697 
6698   Inst.addOperand(MCOperand::createImm(1)); // a16
6699 }
6700 
6701 //===----------------------------------------------------------------------===//
6702 // smrd
6703 //===----------------------------------------------------------------------===//
6704 
6705 bool AMDGPUOperand::isSMRDOffset8() const {
6706   return isImm() && isUInt<8>(getImm());
6707 }
6708 
6709 bool AMDGPUOperand::isSMEMOffset() const {
6710   return isImm(); // Offset range is checked later by validator.
6711 }
6712 
6713 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6714   // 32-bit literals are only supported on CI and we only want to use them
6715   // when the offset is > 8-bits.
6716   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6717 }
6718 
6719 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6720   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6721 }
6722 
6723 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6724   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6725 }
6726 
6727 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6728   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6729 }
6730 
6731 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6732   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6733 }
6734 
6735 //===----------------------------------------------------------------------===//
6736 // vop3
6737 //===----------------------------------------------------------------------===//
6738 
6739 static bool ConvertOmodMul(int64_t &Mul) {
6740   if (Mul != 1 && Mul != 2 && Mul != 4)
6741     return false;
6742 
6743   Mul >>= 1;
6744   return true;
6745 }
6746 
6747 static bool ConvertOmodDiv(int64_t &Div) {
6748   if (Div == 1) {
6749     Div = 0;
6750     return true;
6751   }
6752 
6753   if (Div == 2) {
6754     Div = 3;
6755     return true;
6756   }
6757 
6758   return false;
6759 }
6760 
6761 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6762   if (BoundCtrl == 0) {
6763     BoundCtrl = 1;
6764     return true;
6765   }
6766 
6767   if (BoundCtrl == -1) {
6768     BoundCtrl = 0;
6769     return true;
6770   }
6771 
6772   return false;
6773 }
6774 
6775 // Note: the order in this table matches the order of operands in AsmString.
6776 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6777   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6778   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6779   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6780   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6781   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6782   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6783   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6784   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6785   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6786   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6787   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6788   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6789   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6790   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6791   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6792   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6793   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6794   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6795   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6796   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6797   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6798   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6799   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6800   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6801   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6802   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6803   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6804   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6805   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6806   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6807   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6808   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6809   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6810   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6811   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6812   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6813   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6814   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6815   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6816   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6817   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6818   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6819   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6820 };
6821 
6822 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6823 
6824   OperandMatchResultTy res = parseOptionalOpr(Operands);
6825 
6826   // This is a hack to enable hardcoded mandatory operands which follow
6827   // optional operands.
6828   //
6829   // Current design assumes that all operands after the first optional operand
6830   // are also optional. However implementation of some instructions violates
6831   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6832   //
6833   // To alleviate this problem, we have to (implicitly) parse extra operands
6834   // to make sure autogenerated parser of custom operands never hit hardcoded
6835   // mandatory operands.
6836 
6837   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6838     if (res != MatchOperand_Success ||
6839         isToken(AsmToken::EndOfStatement))
6840       break;
6841 
6842     trySkipToken(AsmToken::Comma);
6843     res = parseOptionalOpr(Operands);
6844   }
6845 
6846   return res;
6847 }
6848 
6849 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6850   OperandMatchResultTy res;
6851   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6852     // try to parse any optional operand here
6853     if (Op.IsBit) {
6854       res = parseNamedBit(Op.Name, Operands, Op.Type);
6855     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6856       res = parseOModOperand(Operands);
6857     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6858                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6859                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6860       res = parseSDWASel(Operands, Op.Name, Op.Type);
6861     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6862       res = parseSDWADstUnused(Operands);
6863     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6864                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6865                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6866                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6867       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6868                                         Op.ConvertResult);
6869     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6870       res = parseDim(Operands);
6871     } else {
6872       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6873     }
6874     if (res != MatchOperand_NoMatch) {
6875       return res;
6876     }
6877   }
6878   return MatchOperand_NoMatch;
6879 }
6880 
6881 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6882   StringRef Name = getTokenStr();
6883   if (Name == "mul") {
6884     return parseIntWithPrefix("mul", Operands,
6885                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6886   }
6887 
6888   if (Name == "div") {
6889     return parseIntWithPrefix("div", Operands,
6890                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6891   }
6892 
6893   return MatchOperand_NoMatch;
6894 }
6895 
6896 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6897   cvtVOP3P(Inst, Operands);
6898 
6899   int Opc = Inst.getOpcode();
6900 
6901   int SrcNum;
6902   const int Ops[] = { AMDGPU::OpName::src0,
6903                       AMDGPU::OpName::src1,
6904                       AMDGPU::OpName::src2 };
6905   for (SrcNum = 0;
6906        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6907        ++SrcNum);
6908   assert(SrcNum > 0);
6909 
6910   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6911   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6912 
6913   if ((OpSel & (1 << SrcNum)) != 0) {
6914     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6915     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6916     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6917   }
6918 }
6919 
6920 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6921       // 1. This operand is input modifiers
6922   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6923       // 2. This is not last operand
6924       && Desc.NumOperands > (OpNum + 1)
6925       // 3. Next operand is register class
6926       && Desc.OpInfo[OpNum + 1].RegClass != -1
6927       // 4. Next register is not tied to any other operand
6928       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6929 }
6930 
6931 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6932 {
6933   OptionalImmIndexMap OptionalIdx;
6934   unsigned Opc = Inst.getOpcode();
6935 
6936   unsigned I = 1;
6937   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6938   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6939     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6940   }
6941 
6942   for (unsigned E = Operands.size(); I != E; ++I) {
6943     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6944     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6945       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6946     } else if (Op.isInterpSlot() ||
6947                Op.isInterpAttr() ||
6948                Op.isAttrChan()) {
6949       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6950     } else if (Op.isImmModifier()) {
6951       OptionalIdx[Op.getImmTy()] = I;
6952     } else {
6953       llvm_unreachable("unhandled operand type");
6954     }
6955   }
6956 
6957   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6958     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6959   }
6960 
6961   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6962     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6963   }
6964 
6965   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6966     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6967   }
6968 }
6969 
6970 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6971                               OptionalImmIndexMap &OptionalIdx) {
6972   unsigned Opc = Inst.getOpcode();
6973 
6974   unsigned I = 1;
6975   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6976   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6977     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6978   }
6979 
6980   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6981     // This instruction has src modifiers
6982     for (unsigned E = Operands.size(); I != E; ++I) {
6983       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6984       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6985         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6986       } else if (Op.isImmModifier()) {
6987         OptionalIdx[Op.getImmTy()] = I;
6988       } else if (Op.isRegOrImm()) {
6989         Op.addRegOrImmOperands(Inst, 1);
6990       } else {
6991         llvm_unreachable("unhandled operand type");
6992       }
6993     }
6994   } else {
6995     // No src modifiers
6996     for (unsigned E = Operands.size(); I != E; ++I) {
6997       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6998       if (Op.isMod()) {
6999         OptionalIdx[Op.getImmTy()] = I;
7000       } else {
7001         Op.addRegOrImmOperands(Inst, 1);
7002       }
7003     }
7004   }
7005 
7006   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7007     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7008   }
7009 
7010   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7011     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7012   }
7013 
7014   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7015   // it has src2 register operand that is tied to dst operand
7016   // we don't allow modifiers for this operand in assembler so src2_modifiers
7017   // should be 0.
7018   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7019       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7020       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7021       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7022       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7023       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7024       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7025       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7026       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7027       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7028     auto it = Inst.begin();
7029     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7030     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7031     ++it;
7032     // Copy the operand to ensure it's not invalidated when Inst grows.
7033     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7034   }
7035 }
7036 
7037 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7038   OptionalImmIndexMap OptionalIdx;
7039   cvtVOP3(Inst, Operands, OptionalIdx);
7040 }
7041 
7042 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7043                                const OperandVector &Operands) {
7044   OptionalImmIndexMap OptIdx;
7045   const int Opc = Inst.getOpcode();
7046   const MCInstrDesc &Desc = MII.get(Opc);
7047 
7048   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7049 
7050   cvtVOP3(Inst, Operands, OptIdx);
7051 
7052   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7053     assert(!IsPacked);
7054     Inst.addOperand(Inst.getOperand(0));
7055   }
7056 
7057   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7058   // instruction, and then figure out where to actually put the modifiers
7059 
7060   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7061 
7062   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7063   if (OpSelHiIdx != -1) {
7064     int DefaultVal = IsPacked ? -1 : 0;
7065     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7066                           DefaultVal);
7067   }
7068 
7069   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7070   if (NegLoIdx != -1) {
7071     assert(IsPacked);
7072     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7073     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7074   }
7075 
7076   const int Ops[] = { AMDGPU::OpName::src0,
7077                       AMDGPU::OpName::src1,
7078                       AMDGPU::OpName::src2 };
7079   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7080                          AMDGPU::OpName::src1_modifiers,
7081                          AMDGPU::OpName::src2_modifiers };
7082 
7083   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7084 
7085   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7086   unsigned OpSelHi = 0;
7087   unsigned NegLo = 0;
7088   unsigned NegHi = 0;
7089 
7090   if (OpSelHiIdx != -1) {
7091     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7092   }
7093 
7094   if (NegLoIdx != -1) {
7095     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7096     NegLo = Inst.getOperand(NegLoIdx).getImm();
7097     NegHi = Inst.getOperand(NegHiIdx).getImm();
7098   }
7099 
7100   for (int J = 0; J < 3; ++J) {
7101     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7102     if (OpIdx == -1)
7103       break;
7104 
7105     uint32_t ModVal = 0;
7106 
7107     if ((OpSel & (1 << J)) != 0)
7108       ModVal |= SISrcMods::OP_SEL_0;
7109 
7110     if ((OpSelHi & (1 << J)) != 0)
7111       ModVal |= SISrcMods::OP_SEL_1;
7112 
7113     if ((NegLo & (1 << J)) != 0)
7114       ModVal |= SISrcMods::NEG;
7115 
7116     if ((NegHi & (1 << J)) != 0)
7117       ModVal |= SISrcMods::NEG_HI;
7118 
7119     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7120 
7121     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7122   }
7123 }
7124 
7125 //===----------------------------------------------------------------------===//
7126 // dpp
7127 //===----------------------------------------------------------------------===//
7128 
7129 bool AMDGPUOperand::isDPP8() const {
7130   return isImmTy(ImmTyDPP8);
7131 }
7132 
7133 bool AMDGPUOperand::isDPPCtrl() const {
7134   using namespace AMDGPU::DPP;
7135 
7136   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7137   if (result) {
7138     int64_t Imm = getImm();
7139     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7140            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7141            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7142            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7143            (Imm == DppCtrl::WAVE_SHL1) ||
7144            (Imm == DppCtrl::WAVE_ROL1) ||
7145            (Imm == DppCtrl::WAVE_SHR1) ||
7146            (Imm == DppCtrl::WAVE_ROR1) ||
7147            (Imm == DppCtrl::ROW_MIRROR) ||
7148            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7149            (Imm == DppCtrl::BCAST15) ||
7150            (Imm == DppCtrl::BCAST31) ||
7151            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7152            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7153   }
7154   return false;
7155 }
7156 
7157 //===----------------------------------------------------------------------===//
7158 // mAI
7159 //===----------------------------------------------------------------------===//
7160 
7161 bool AMDGPUOperand::isBLGP() const {
7162   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7163 }
7164 
7165 bool AMDGPUOperand::isCBSZ() const {
7166   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7167 }
7168 
7169 bool AMDGPUOperand::isABID() const {
7170   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7171 }
7172 
7173 bool AMDGPUOperand::isS16Imm() const {
7174   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7175 }
7176 
7177 bool AMDGPUOperand::isU16Imm() const {
7178   return isImm() && isUInt<16>(getImm());
7179 }
7180 
7181 //===----------------------------------------------------------------------===//
7182 // dim
7183 //===----------------------------------------------------------------------===//
7184 
7185 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7186   // We want to allow "dim:1D" etc.,
7187   // but the initial 1 is tokenized as an integer.
7188   std::string Token;
7189   if (isToken(AsmToken::Integer)) {
7190     SMLoc Loc = getToken().getEndLoc();
7191     Token = std::string(getTokenStr());
7192     lex();
7193     if (getLoc() != Loc)
7194       return false;
7195   }
7196 
7197   StringRef Suffix;
7198   if (!parseId(Suffix))
7199     return false;
7200   Token += Suffix;
7201 
7202   StringRef DimId = Token;
7203   if (DimId.startswith("SQ_RSRC_IMG_"))
7204     DimId = DimId.drop_front(12);
7205 
7206   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7207   if (!DimInfo)
7208     return false;
7209 
7210   Encoding = DimInfo->Encoding;
7211   return true;
7212 }
7213 
7214 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7215   if (!isGFX10Plus())
7216     return MatchOperand_NoMatch;
7217 
7218   SMLoc S = getLoc();
7219 
7220   if (!trySkipId("dim", AsmToken::Colon))
7221     return MatchOperand_NoMatch;
7222 
7223   unsigned Encoding;
7224   SMLoc Loc = getLoc();
7225   if (!parseDimId(Encoding)) {
7226     Error(Loc, "invalid dim value");
7227     return MatchOperand_ParseFail;
7228   }
7229 
7230   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7231                                               AMDGPUOperand::ImmTyDim));
7232   return MatchOperand_Success;
7233 }
7234 
7235 //===----------------------------------------------------------------------===//
7236 // dpp
7237 //===----------------------------------------------------------------------===//
7238 
7239 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7240   SMLoc S = getLoc();
7241 
7242   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7243     return MatchOperand_NoMatch;
7244 
7245   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7246 
7247   int64_t Sels[8];
7248 
7249   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7250     return MatchOperand_ParseFail;
7251 
7252   for (size_t i = 0; i < 8; ++i) {
7253     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7254       return MatchOperand_ParseFail;
7255 
7256     SMLoc Loc = getLoc();
7257     if (getParser().parseAbsoluteExpression(Sels[i]))
7258       return MatchOperand_ParseFail;
7259     if (0 > Sels[i] || 7 < Sels[i]) {
7260       Error(Loc, "expected a 3-bit value");
7261       return MatchOperand_ParseFail;
7262     }
7263   }
7264 
7265   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7266     return MatchOperand_ParseFail;
7267 
7268   unsigned DPP8 = 0;
7269   for (size_t i = 0; i < 8; ++i)
7270     DPP8 |= (Sels[i] << (i * 3));
7271 
7272   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7273   return MatchOperand_Success;
7274 }
7275 
7276 bool
7277 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7278                                     const OperandVector &Operands) {
7279   if (Ctrl == "row_share" ||
7280       Ctrl == "row_xmask")
7281     return isGFX10Plus();
7282 
7283   if (Ctrl == "wave_shl" ||
7284       Ctrl == "wave_shr" ||
7285       Ctrl == "wave_rol" ||
7286       Ctrl == "wave_ror" ||
7287       Ctrl == "row_bcast")
7288     return isVI() || isGFX9();
7289 
7290   return Ctrl == "row_mirror" ||
7291          Ctrl == "row_half_mirror" ||
7292          Ctrl == "quad_perm" ||
7293          Ctrl == "row_shl" ||
7294          Ctrl == "row_shr" ||
7295          Ctrl == "row_ror";
7296 }
7297 
7298 int64_t
7299 AMDGPUAsmParser::parseDPPCtrlPerm() {
7300   // quad_perm:[%d,%d,%d,%d]
7301 
7302   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7303     return -1;
7304 
7305   int64_t Val = 0;
7306   for (int i = 0; i < 4; ++i) {
7307     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7308       return -1;
7309 
7310     int64_t Temp;
7311     SMLoc Loc = getLoc();
7312     if (getParser().parseAbsoluteExpression(Temp))
7313       return -1;
7314     if (Temp < 0 || Temp > 3) {
7315       Error(Loc, "expected a 2-bit value");
7316       return -1;
7317     }
7318 
7319     Val += (Temp << i * 2);
7320   }
7321 
7322   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7323     return -1;
7324 
7325   return Val;
7326 }
7327 
7328 int64_t
7329 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7330   using namespace AMDGPU::DPP;
7331 
7332   // sel:%d
7333 
7334   int64_t Val;
7335   SMLoc Loc = getLoc();
7336 
7337   if (getParser().parseAbsoluteExpression(Val))
7338     return -1;
7339 
7340   struct DppCtrlCheck {
7341     int64_t Ctrl;
7342     int Lo;
7343     int Hi;
7344   };
7345 
7346   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7347     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7348     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7349     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7350     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7351     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7352     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7353     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7354     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7355     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7356     .Default({-1, 0, 0});
7357 
7358   bool Valid;
7359   if (Check.Ctrl == -1) {
7360     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7361     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7362   } else {
7363     Valid = Check.Lo <= Val && Val <= Check.Hi;
7364     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7365   }
7366 
7367   if (!Valid) {
7368     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7369     return -1;
7370   }
7371 
7372   return Val;
7373 }
7374 
7375 OperandMatchResultTy
7376 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7377   using namespace AMDGPU::DPP;
7378 
7379   if (!isToken(AsmToken::Identifier) ||
7380       !isSupportedDPPCtrl(getTokenStr(), Operands))
7381     return MatchOperand_NoMatch;
7382 
7383   SMLoc S = getLoc();
7384   int64_t Val = -1;
7385   StringRef Ctrl;
7386 
7387   parseId(Ctrl);
7388 
7389   if (Ctrl == "row_mirror") {
7390     Val = DppCtrl::ROW_MIRROR;
7391   } else if (Ctrl == "row_half_mirror") {
7392     Val = DppCtrl::ROW_HALF_MIRROR;
7393   } else {
7394     if (skipToken(AsmToken::Colon, "expected a colon")) {
7395       if (Ctrl == "quad_perm") {
7396         Val = parseDPPCtrlPerm();
7397       } else {
7398         Val = parseDPPCtrlSel(Ctrl);
7399       }
7400     }
7401   }
7402 
7403   if (Val == -1)
7404     return MatchOperand_ParseFail;
7405 
7406   Operands.push_back(
7407     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7408   return MatchOperand_Success;
7409 }
7410 
7411 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7412   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7413 }
7414 
7415 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7416   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7417 }
7418 
7419 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7420   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7421 }
7422 
7423 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7424   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7425 }
7426 
7427 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7428   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7429 }
7430 
7431 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7432   OptionalImmIndexMap OptionalIdx;
7433 
7434   unsigned I = 1;
7435   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7436   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7437     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7438   }
7439 
7440   int Fi = 0;
7441   for (unsigned E = Operands.size(); I != E; ++I) {
7442     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7443                                             MCOI::TIED_TO);
7444     if (TiedTo != -1) {
7445       assert((unsigned)TiedTo < Inst.getNumOperands());
7446       // handle tied old or src2 for MAC instructions
7447       Inst.addOperand(Inst.getOperand(TiedTo));
7448     }
7449     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7450     // Add the register arguments
7451     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7452       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7453       // Skip it.
7454       continue;
7455     }
7456 
7457     if (IsDPP8) {
7458       if (Op.isDPP8()) {
7459         Op.addImmOperands(Inst, 1);
7460       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7461         Op.addRegWithFPInputModsOperands(Inst, 2);
7462       } else if (Op.isFI()) {
7463         Fi = Op.getImm();
7464       } else if (Op.isReg()) {
7465         Op.addRegOperands(Inst, 1);
7466       } else {
7467         llvm_unreachable("Invalid operand type");
7468       }
7469     } else {
7470       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7471         Op.addRegWithFPInputModsOperands(Inst, 2);
7472       } else if (Op.isDPPCtrl()) {
7473         Op.addImmOperands(Inst, 1);
7474       } else if (Op.isImm()) {
7475         // Handle optional arguments
7476         OptionalIdx[Op.getImmTy()] = I;
7477       } else {
7478         llvm_unreachable("Invalid operand type");
7479       }
7480     }
7481   }
7482 
7483   if (IsDPP8) {
7484     using namespace llvm::AMDGPU::DPP;
7485     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7486   } else {
7487     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7488     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7489     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7490     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7491       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7492     }
7493   }
7494 }
7495 
7496 //===----------------------------------------------------------------------===//
7497 // sdwa
7498 //===----------------------------------------------------------------------===//
7499 
7500 OperandMatchResultTy
7501 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7502                               AMDGPUOperand::ImmTy Type) {
7503   using namespace llvm::AMDGPU::SDWA;
7504 
7505   SMLoc S = getLoc();
7506   StringRef Value;
7507   OperandMatchResultTy res;
7508 
7509   SMLoc StringLoc;
7510   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7511   if (res != MatchOperand_Success) {
7512     return res;
7513   }
7514 
7515   int64_t Int;
7516   Int = StringSwitch<int64_t>(Value)
7517         .Case("BYTE_0", SdwaSel::BYTE_0)
7518         .Case("BYTE_1", SdwaSel::BYTE_1)
7519         .Case("BYTE_2", SdwaSel::BYTE_2)
7520         .Case("BYTE_3", SdwaSel::BYTE_3)
7521         .Case("WORD_0", SdwaSel::WORD_0)
7522         .Case("WORD_1", SdwaSel::WORD_1)
7523         .Case("DWORD", SdwaSel::DWORD)
7524         .Default(0xffffffff);
7525 
7526   if (Int == 0xffffffff) {
7527     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7528     return MatchOperand_ParseFail;
7529   }
7530 
7531   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7532   return MatchOperand_Success;
7533 }
7534 
7535 OperandMatchResultTy
7536 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7537   using namespace llvm::AMDGPU::SDWA;
7538 
7539   SMLoc S = getLoc();
7540   StringRef Value;
7541   OperandMatchResultTy res;
7542 
7543   SMLoc StringLoc;
7544   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
7545   if (res != MatchOperand_Success) {
7546     return res;
7547   }
7548 
7549   int64_t Int;
7550   Int = StringSwitch<int64_t>(Value)
7551         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7552         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7553         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7554         .Default(0xffffffff);
7555 
7556   if (Int == 0xffffffff) {
7557     Error(StringLoc, "invalid dst_unused value");
7558     return MatchOperand_ParseFail;
7559   }
7560 
7561   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7562   return MatchOperand_Success;
7563 }
7564 
7565 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7566   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7567 }
7568 
7569 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7570   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7571 }
7572 
7573 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7574   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7575 }
7576 
7577 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7578   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7579 }
7580 
7581 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7582   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7583 }
7584 
7585 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7586                               uint64_t BasicInstType,
7587                               bool SkipDstVcc,
7588                               bool SkipSrcVcc) {
7589   using namespace llvm::AMDGPU::SDWA;
7590 
7591   OptionalImmIndexMap OptionalIdx;
7592   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7593   bool SkippedVcc = false;
7594 
7595   unsigned I = 1;
7596   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7597   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7598     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7599   }
7600 
7601   for (unsigned E = Operands.size(); I != E; ++I) {
7602     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7603     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7604         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7605       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7606       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7607       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7608       // Skip VCC only if we didn't skip it on previous iteration.
7609       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7610       if (BasicInstType == SIInstrFlags::VOP2 &&
7611           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7612            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7613         SkippedVcc = true;
7614         continue;
7615       } else if (BasicInstType == SIInstrFlags::VOPC &&
7616                  Inst.getNumOperands() == 0) {
7617         SkippedVcc = true;
7618         continue;
7619       }
7620     }
7621     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7622       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7623     } else if (Op.isImm()) {
7624       // Handle optional arguments
7625       OptionalIdx[Op.getImmTy()] = I;
7626     } else {
7627       llvm_unreachable("Invalid operand type");
7628     }
7629     SkippedVcc = false;
7630   }
7631 
7632   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7633       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7634       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7635     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7636     switch (BasicInstType) {
7637     case SIInstrFlags::VOP1:
7638       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7639       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7640         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7641       }
7642       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7643       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7644       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7645       break;
7646 
7647     case SIInstrFlags::VOP2:
7648       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7649       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7650         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7651       }
7652       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7653       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7654       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7655       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7656       break;
7657 
7658     case SIInstrFlags::VOPC:
7659       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7660         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7661       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7662       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7663       break;
7664 
7665     default:
7666       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7667     }
7668   }
7669 
7670   // special case v_mac_{f16, f32}:
7671   // it has src2 register operand that is tied to dst operand
7672   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7673       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7674     auto it = Inst.begin();
7675     std::advance(
7676       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7677     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7678   }
7679 }
7680 
7681 //===----------------------------------------------------------------------===//
7682 // mAI
7683 //===----------------------------------------------------------------------===//
7684 
7685 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7686   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7687 }
7688 
7689 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7690   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7691 }
7692 
7693 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7694   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7695 }
7696 
7697 /// Force static initialization.
7698 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7699   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7700   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7701 }
7702 
7703 #define GET_REGISTER_MATCHER
7704 #define GET_MATCHER_IMPLEMENTATION
7705 #define GET_MNEMONIC_SPELL_CHECKER
7706 #define GET_MNEMONIC_CHECKER
7707 #include "AMDGPUGenAsmMatcher.inc"
7708 
7709 // This fuction should be defined after auto-generated include so that we have
7710 // MatchClassKind enum defined
7711 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7712                                                      unsigned Kind) {
7713   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7714   // But MatchInstructionImpl() expects to meet token and fails to validate
7715   // operand. This method checks if we are given immediate operand but expect to
7716   // get corresponding token.
7717   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7718   switch (Kind) {
7719   case MCK_addr64:
7720     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7721   case MCK_gds:
7722     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7723   case MCK_lds:
7724     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7725   case MCK_glc:
7726     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7727   case MCK_idxen:
7728     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7729   case MCK_offen:
7730     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7731   case MCK_SSrcB32:
7732     // When operands have expression values, they will return true for isToken,
7733     // because it is not possible to distinguish between a token and an
7734     // expression at parse time. MatchInstructionImpl() will always try to
7735     // match an operand as a token, when isToken returns true, and when the
7736     // name of the expression is not a valid token, the match will fail,
7737     // so we need to handle it here.
7738     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7739   case MCK_SSrcF32:
7740     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7741   case MCK_SoppBrTarget:
7742     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7743   case MCK_VReg32OrOff:
7744     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7745   case MCK_InterpSlot:
7746     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7747   case MCK_Attr:
7748     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7749   case MCK_AttrChan:
7750     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7751   case MCK_ImmSMEMOffset:
7752     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7753   case MCK_SReg_64:
7754   case MCK_SReg_64_XEXEC:
7755     // Null is defined as a 32-bit register but
7756     // it should also be enabled with 64-bit operands.
7757     // The following code enables it for SReg_64 operands
7758     // used as source and destination. Remaining source
7759     // operands are handled in isInlinableImm.
7760     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7761   default:
7762     return Match_InvalidOperand;
7763   }
7764 }
7765 
7766 //===----------------------------------------------------------------------===//
7767 // endpgm
7768 //===----------------------------------------------------------------------===//
7769 
7770 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7771   SMLoc S = getLoc();
7772   int64_t Imm = 0;
7773 
7774   if (!parseExpr(Imm)) {
7775     // The operand is optional, if not present default to 0
7776     Imm = 0;
7777   }
7778 
7779   if (!isUInt<16>(Imm)) {
7780     Error(S, "expected a 16-bit value");
7781     return MatchOperand_ParseFail;
7782   }
7783 
7784   Operands.push_back(
7785       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7786   return MatchOperand_Success;
7787 }
7788 
7789 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7790