1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isSDelayAlu() const;
823   bool isHwreg() const;
824   bool isSendMsg() const;
825   bool isSwizzle() const;
826   bool isSMRDOffset8() const;
827   bool isSMEMOffset() const;
828   bool isSMRDLiteralOffset() const;
829   bool isDPP8() const;
830   bool isDPPCtrl() const;
831   bool isBLGP() const;
832   bool isCBSZ() const;
833   bool isABID() const;
834   bool isGPRIdxMode() const;
835   bool isS16Imm() const;
836   bool isU16Imm() const;
837   bool isEndpgm() const;
838 
839   StringRef getExpressionAsToken() const {
840     assert(isExpr());
841     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842     return S->getSymbol().getName();
843   }
844 
845   StringRef getToken() const {
846     assert(isToken());
847 
848     if (Kind == Expression)
849       return getExpressionAsToken();
850 
851     return StringRef(Tok.Data, Tok.Length);
852   }
853 
854   int64_t getImm() const {
855     assert(isImm());
856     return Imm.Val;
857   }
858 
859   void setImm(int64_t Val) {
860     assert(isImm());
861     Imm.Val = Val;
862   }
863 
864   ImmTy getImmTy() const {
865     assert(isImm());
866     return Imm.Type;
867   }
868 
869   unsigned getReg() const override {
870     assert(isRegKind());
871     return Reg.RegNo;
872   }
873 
874   SMLoc getStartLoc() const override {
875     return StartLoc;
876   }
877 
878   SMLoc getEndLoc() const override {
879     return EndLoc;
880   }
881 
882   SMRange getLocRange() const {
883     return SMRange(StartLoc, EndLoc);
884   }
885 
886   Modifiers getModifiers() const {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     return isRegKind() ? Reg.Mods : Imm.Mods;
889   }
890 
891   void setModifiers(Modifiers Mods) {
892     assert(isRegKind() || isImmTy(ImmTyNone));
893     if (isRegKind())
894       Reg.Mods = Mods;
895     else
896       Imm.Mods = Mods;
897   }
898 
899   bool hasModifiers() const {
900     return getModifiers().hasModifiers();
901   }
902 
903   bool hasFPModifiers() const {
904     return getModifiers().hasFPModifiers();
905   }
906 
907   bool hasIntModifiers() const {
908     return getModifiers().hasIntModifiers();
909   }
910 
911   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917   template <unsigned Bitwidth>
918   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<16>(Inst, N);
922   }
923 
924   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<32>(Inst, N);
926   }
927 
928   void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931     addRegOperands(Inst, N);
932   }
933 
934   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935     if (isRegKind())
936       addRegOperands(Inst, N);
937     else if (isExpr())
938       Inst.addOperand(MCOperand::createExpr(Expr));
939     else
940       addImmOperands(Inst, N);
941   }
942 
943   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944     Modifiers Mods = getModifiers();
945     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946     if (isRegKind()) {
947       addRegOperands(Inst, N);
948     } else {
949       addImmOperands(Inst, N, false);
950     }
951   }
952 
953   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasIntModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959     assert(!hasFPModifiers());
960     addRegOrImmWithInputModsOperands(Inst, N);
961   }
962 
963   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964     Modifiers Mods = getModifiers();
965     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966     assert(isRegKind());
967     addRegOperands(Inst, N);
968   }
969 
970   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasIntModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasFPModifiers());
977     addRegWithInputModsOperands(Inst, N);
978   }
979 
980   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981     if (isImm())
982       addImmOperands(Inst, N);
983     else {
984       assert(isExpr());
985       Inst.addOperand(MCOperand::createExpr(Expr));
986     }
987   }
988 
989   static void printImmTy(raw_ostream& OS, ImmTy Type) {
990     switch (Type) {
991     case ImmTyNone: OS << "None"; break;
992     case ImmTyGDS: OS << "GDS"; break;
993     case ImmTyLDS: OS << "LDS"; break;
994     case ImmTyOffen: OS << "Offen"; break;
995     case ImmTyIdxen: OS << "Idxen"; break;
996     case ImmTyAddr64: OS << "Addr64"; break;
997     case ImmTyOffset: OS << "Offset"; break;
998     case ImmTyInstOffset: OS << "InstOffset"; break;
999     case ImmTyOffset0: OS << "Offset0"; break;
1000     case ImmTyOffset1: OS << "Offset1"; break;
1001     case ImmTyCPol: OS << "CPol"; break;
1002     case ImmTySWZ: OS << "SWZ"; break;
1003     case ImmTyTFE: OS << "TFE"; break;
1004     case ImmTyD16: OS << "D16"; break;
1005     case ImmTyFORMAT: OS << "FORMAT"; break;
1006     case ImmTyClampSI: OS << "ClampSI"; break;
1007     case ImmTyOModSI: OS << "OModSI"; break;
1008     case ImmTyDPP8: OS << "DPP8"; break;
1009     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1010     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1011     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1012     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1013     case ImmTyDppFi: OS << "FI"; break;
1014     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1015     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1016     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1017     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1018     case ImmTyDMask: OS << "DMask"; break;
1019     case ImmTyDim: OS << "Dim"; break;
1020     case ImmTyUNorm: OS << "UNorm"; break;
1021     case ImmTyDA: OS << "DA"; break;
1022     case ImmTyR128A16: OS << "R128A16"; break;
1023     case ImmTyA16: OS << "A16"; break;
1024     case ImmTyLWE: OS << "LWE"; break;
1025     case ImmTyOff: OS << "Off"; break;
1026     case ImmTyExpTgt: OS << "ExpTgt"; break;
1027     case ImmTyExpCompr: OS << "ExpCompr"; break;
1028     case ImmTyExpVM: OS << "ExpVM"; break;
1029     case ImmTyHwreg: OS << "Hwreg"; break;
1030     case ImmTySendMsg: OS << "SendMsg"; break;
1031     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1032     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1033     case ImmTyAttrChan: OS << "AttrChan"; break;
1034     case ImmTyOpSel: OS << "OpSel"; break;
1035     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1036     case ImmTyNegLo: OS << "NegLo"; break;
1037     case ImmTyNegHi: OS << "NegHi"; break;
1038     case ImmTySwizzle: OS << "Swizzle"; break;
1039     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1040     case ImmTyHigh: OS << "High"; break;
1041     case ImmTyBLGP: OS << "BLGP"; break;
1042     case ImmTyCBSZ: OS << "CBSZ"; break;
1043     case ImmTyABID: OS << "ABID"; break;
1044     case ImmTyEndpgm: OS << "Endpgm"; break;
1045     }
1046   }
1047 
1048   void print(raw_ostream &OS) const override {
1049     switch (Kind) {
1050     case Register:
1051       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1052       break;
1053     case Immediate:
1054       OS << '<' << getImm();
1055       if (getImmTy() != ImmTyNone) {
1056         OS << " type: "; printImmTy(OS, getImmTy());
1057       }
1058       OS << " mods: " << Imm.Mods << '>';
1059       break;
1060     case Token:
1061       OS << '\'' << getToken() << '\'';
1062       break;
1063     case Expression:
1064       OS << "<expr " << *Expr << '>';
1065       break;
1066     }
1067   }
1068 
1069   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1070                                       int64_t Val, SMLoc Loc,
1071                                       ImmTy Type = ImmTyNone,
1072                                       bool IsFPImm = false) {
1073     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1074     Op->Imm.Val = Val;
1075     Op->Imm.IsFPImm = IsFPImm;
1076     Op->Imm.Kind = ImmKindTyNone;
1077     Op->Imm.Type = Type;
1078     Op->Imm.Mods = Modifiers();
1079     Op->StartLoc = Loc;
1080     Op->EndLoc = Loc;
1081     return Op;
1082   }
1083 
1084   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1085                                         StringRef Str, SMLoc Loc,
1086                                         bool HasExplicitEncodingSize = true) {
1087     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1088     Res->Tok.Data = Str.data();
1089     Res->Tok.Length = Str.size();
1090     Res->StartLoc = Loc;
1091     Res->EndLoc = Loc;
1092     return Res;
1093   }
1094 
1095   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1096                                       unsigned RegNo, SMLoc S,
1097                                       SMLoc E) {
1098     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1099     Op->Reg.RegNo = RegNo;
1100     Op->Reg.Mods = Modifiers();
1101     Op->StartLoc = S;
1102     Op->EndLoc = E;
1103     return Op;
1104   }
1105 
1106   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1107                                        const class MCExpr *Expr, SMLoc S) {
1108     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1109     Op->Expr = Expr;
1110     Op->StartLoc = S;
1111     Op->EndLoc = S;
1112     return Op;
1113   }
1114 };
1115 
1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1117   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1118   return OS;
1119 }
1120 
1121 //===----------------------------------------------------------------------===//
1122 // AsmParser
1123 //===----------------------------------------------------------------------===//
1124 
1125 // Holds info related to the current kernel, e.g. count of SGPRs used.
1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1127 // .amdgpu_hsa_kernel or at EOF.
1128 class KernelScopeInfo {
1129   int SgprIndexUnusedMin = -1;
1130   int VgprIndexUnusedMin = -1;
1131   int AgprIndexUnusedMin = -1;
1132   MCContext *Ctx = nullptr;
1133   MCSubtargetInfo const *MSTI = nullptr;
1134 
1135   void usesSgprAt(int i) {
1136     if (i >= SgprIndexUnusedMin) {
1137       SgprIndexUnusedMin = ++i;
1138       if (Ctx) {
1139         MCSymbol* const Sym =
1140           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142       }
1143     }
1144   }
1145 
1146   void usesVgprAt(int i) {
1147     if (i >= VgprIndexUnusedMin) {
1148       VgprIndexUnusedMin = ++i;
1149       if (Ctx) {
1150         MCSymbol* const Sym =
1151           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1152         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1153                                          VgprIndexUnusedMin);
1154         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1155       }
1156     }
1157   }
1158 
1159   void usesAgprAt(int i) {
1160     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1161     if (!hasMAIInsts(*MSTI))
1162       return;
1163 
1164     if (i >= AgprIndexUnusedMin) {
1165       AgprIndexUnusedMin = ++i;
1166       if (Ctx) {
1167         MCSymbol* const Sym =
1168           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1169         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1170 
1171         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1172         MCSymbol* const vSym =
1173           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1174         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1175                                          VgprIndexUnusedMin);
1176         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1177       }
1178     }
1179   }
1180 
1181 public:
1182   KernelScopeInfo() = default;
1183 
1184   void initialize(MCContext &Context) {
1185     Ctx = &Context;
1186     MSTI = Ctx->getSubtargetInfo();
1187 
1188     usesSgprAt(SgprIndexUnusedMin = -1);
1189     usesVgprAt(VgprIndexUnusedMin = -1);
1190     if (hasMAIInsts(*MSTI)) {
1191       usesAgprAt(AgprIndexUnusedMin = -1);
1192     }
1193   }
1194 
1195   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1196                     unsigned RegWidth) {
1197     switch (RegKind) {
1198     case IS_SGPR:
1199       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1200       break;
1201     case IS_AGPR:
1202       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1203       break;
1204     case IS_VGPR:
1205       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     default:
1208       break;
1209     }
1210   }
1211 };
1212 
1213 class AMDGPUAsmParser : public MCTargetAsmParser {
1214   MCAsmParser &Parser;
1215 
1216   // Number of extra operands parsed after the first optional operand.
1217   // This may be necessary to skip hardcoded mandatory operands.
1218   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1219 
1220   unsigned ForcedEncodingSize = 0;
1221   bool ForcedDPP = false;
1222   bool ForcedSDWA = false;
1223   KernelScopeInfo KernelScope;
1224   unsigned CPolSeen;
1225 
1226   /// @name Auto-generated Match Functions
1227   /// {
1228 
1229 #define GET_ASSEMBLER_HEADER
1230 #include "AMDGPUGenAsmMatcher.inc"
1231 
1232   /// }
1233 
1234 private:
1235   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1236   bool OutOfRangeError(SMRange Range);
1237   /// Calculate VGPR/SGPR blocks required for given target, reserved
1238   /// registers, and user-specified NextFreeXGPR values.
1239   ///
1240   /// \param Features [in] Target features, used for bug corrections.
1241   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1242   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1243   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1244   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1245   /// descriptor field, if valid.
1246   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1247   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1248   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1249   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1250   /// \param VGPRBlocks [out] Result VGPR block count.
1251   /// \param SGPRBlocks [out] Result SGPR block count.
1252   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1253                           bool FlatScrUsed, bool XNACKUsed,
1254                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1255                           SMRange VGPRRange, unsigned NextFreeSGPR,
1256                           SMRange SGPRRange, unsigned &VGPRBlocks,
1257                           unsigned &SGPRBlocks);
1258   bool ParseDirectiveAMDGCNTarget();
1259   bool ParseDirectiveAMDHSAKernel();
1260   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1261   bool ParseDirectiveHSACodeObjectVersion();
1262   bool ParseDirectiveHSACodeObjectISA();
1263   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1264   bool ParseDirectiveAMDKernelCodeT();
1265   // TODO: Possibly make subtargetHasRegister const.
1266   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1267   bool ParseDirectiveAMDGPUHsaKernel();
1268 
1269   bool ParseDirectiveISAVersion();
1270   bool ParseDirectiveHSAMetadata();
1271   bool ParseDirectivePALMetadataBegin();
1272   bool ParseDirectivePALMetadata();
1273   bool ParseDirectiveAMDGPULDS();
1274 
1275   /// Common code to parse out a block of text (typically YAML) between start and
1276   /// end directives.
1277   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1278                            const char *AssemblerDirectiveEnd,
1279                            std::string &CollectString);
1280 
1281   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1282                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1283   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1284                            unsigned &RegNum, unsigned &RegWidth,
1285                            bool RestoreOnFailure = false);
1286   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1287                            unsigned &RegNum, unsigned &RegWidth,
1288                            SmallVectorImpl<AsmToken> &Tokens);
1289   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1290                            unsigned &RegWidth,
1291                            SmallVectorImpl<AsmToken> &Tokens);
1292   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1293                            unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1296                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1297   bool ParseRegRange(unsigned& Num, unsigned& Width);
1298   unsigned getRegularReg(RegisterKind RegKind,
1299                          unsigned RegNum,
1300                          unsigned RegWidth,
1301                          SMLoc Loc);
1302 
1303   bool isRegister();
1304   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1305   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1306   void initializeGprCountSymbol(RegisterKind RegKind);
1307   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1308                              unsigned RegWidth);
1309   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1310                     bool IsAtomic, bool IsLds = false);
1311   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1312                  bool IsGdsHardcoded);
1313 
1314 public:
1315   enum AMDGPUMatchResultTy {
1316     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1317   };
1318   enum OperandMode {
1319     OperandMode_Default,
1320     OperandMode_NSA,
1321   };
1322 
1323   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1324 
1325   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1326                const MCInstrInfo &MII,
1327                const MCTargetOptions &Options)
1328       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1329     MCAsmParserExtension::Initialize(Parser);
1330 
1331     if (getFeatureBits().none()) {
1332       // Set default features.
1333       copySTI().ToggleFeature("southern-islands");
1334     }
1335 
1336     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1337 
1338     {
1339       // TODO: make those pre-defined variables read-only.
1340       // Currently there is none suitable machinery in the core llvm-mc for this.
1341       // MCSymbol::isRedefinable is intended for another purpose, and
1342       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1343       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1344       MCContext &Ctx = getContext();
1345       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1346         MCSymbol *Sym =
1347             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1348         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1349         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1351         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1352         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1353       } else {
1354         MCSymbol *Sym =
1355             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1359         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1360         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1361       }
1362       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1363         initializeGprCountSymbol(IS_VGPR);
1364         initializeGprCountSymbol(IS_SGPR);
1365       } else
1366         KernelScope.initialize(getContext());
1367     }
1368   }
1369 
1370   bool hasMIMG_R128() const {
1371     return AMDGPU::hasMIMG_R128(getSTI());
1372   }
1373 
1374   bool hasPackedD16() const {
1375     return AMDGPU::hasPackedD16(getSTI());
1376   }
1377 
1378   bool hasGFX10A16() const {
1379     return AMDGPU::hasGFX10A16(getSTI());
1380   }
1381 
1382   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1383 
1384   bool isSI() const {
1385     return AMDGPU::isSI(getSTI());
1386   }
1387 
1388   bool isCI() const {
1389     return AMDGPU::isCI(getSTI());
1390   }
1391 
1392   bool isVI() const {
1393     return AMDGPU::isVI(getSTI());
1394   }
1395 
1396   bool isGFX9() const {
1397     return AMDGPU::isGFX9(getSTI());
1398   }
1399 
1400   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1401   bool isGFX90A() const {
1402     return AMDGPU::isGFX90A(getSTI());
1403   }
1404 
1405   bool isGFX940() const {
1406     return AMDGPU::isGFX940(getSTI());
1407   }
1408 
1409   bool isGFX9Plus() const {
1410     return AMDGPU::isGFX9Plus(getSTI());
1411   }
1412 
1413   bool isGFX10() const {
1414     return AMDGPU::isGFX10(getSTI());
1415   }
1416 
1417   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1418 
1419   bool isGFX11() const {
1420     return AMDGPU::isGFX11(getSTI());
1421   }
1422 
1423   bool isGFX11Plus() const {
1424     return AMDGPU::isGFX11Plus(getSTI());
1425   }
1426 
1427   bool isGFX10_BEncoding() const {
1428     return AMDGPU::isGFX10_BEncoding(getSTI());
1429   }
1430 
1431   bool hasInv2PiInlineImm() const {
1432     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1433   }
1434 
1435   bool hasFlatOffsets() const {
1436     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1437   }
1438 
1439   bool hasArchitectedFlatScratch() const {
1440     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1441   }
1442 
1443   bool hasSGPR102_SGPR103() const {
1444     return !isVI() && !isGFX9();
1445   }
1446 
1447   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1448 
1449   bool hasIntClamp() const {
1450     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1451   }
1452 
1453   AMDGPUTargetStreamer &getTargetStreamer() {
1454     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1455     return static_cast<AMDGPUTargetStreamer &>(TS);
1456   }
1457 
1458   const MCRegisterInfo *getMRI() const {
1459     // We need this const_cast because for some reason getContext() is not const
1460     // in MCAsmParser.
1461     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1462   }
1463 
1464   const MCInstrInfo *getMII() const {
1465     return &MII;
1466   }
1467 
1468   const FeatureBitset &getFeatureBits() const {
1469     return getSTI().getFeatureBits();
1470   }
1471 
1472   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1473   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1474   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1475 
1476   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1477   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1478   bool isForcedDPP() const { return ForcedDPP; }
1479   bool isForcedSDWA() const { return ForcedSDWA; }
1480   ArrayRef<unsigned> getMatchedVariants() const;
1481   StringRef getMatchedVariantName() const;
1482 
1483   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1484   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1485                      bool RestoreOnFailure);
1486   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1487   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1488                                         SMLoc &EndLoc) override;
1489   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1490   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1491                                       unsigned Kind) override;
1492   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1493                                OperandVector &Operands, MCStreamer &Out,
1494                                uint64_t &ErrorInfo,
1495                                bool MatchingInlineAsm) override;
1496   bool ParseDirective(AsmToken DirectiveID) override;
1497   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1498                                     OperandMode Mode = OperandMode_Default);
1499   StringRef parseMnemonicSuffix(StringRef Name);
1500   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1501                         SMLoc NameLoc, OperandVector &Operands) override;
1502   //bool ProcessInstruction(MCInst &Inst);
1503 
1504   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1505 
1506   OperandMatchResultTy
1507   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1508                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1509                      bool (*ConvertResult)(int64_t &) = nullptr);
1510 
1511   OperandMatchResultTy
1512   parseOperandArrayWithPrefix(const char *Prefix,
1513                               OperandVector &Operands,
1514                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                               bool (*ConvertResult)(int64_t&) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseNamedBit(StringRef Name, OperandVector &Operands,
1519                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1520   OperandMatchResultTy parseCPol(OperandVector &Operands);
1521   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1522                                              StringRef &Value,
1523                                              SMLoc &StringLoc);
1524 
1525   bool isModifier();
1526   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1527   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1528   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1529   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1530   bool parseSP3NegModifier();
1531   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1532   OperandMatchResultTy parseReg(OperandVector &Operands);
1533   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1534   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1535   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1536   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1537   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1538   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1539   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1540   OperandMatchResultTy parseUfmt(int64_t &Format);
1541   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1542   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1543   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1544   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1545   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1546   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1547   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1548 
1549   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1550   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1551   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1552   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1553 
1554   bool parseCnt(int64_t &IntVal);
1555   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1556 
1557   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1558   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1559   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1560 
1561   bool parseDelay(int64_t &Delay);
1562   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1563 
1564   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1565 
1566 private:
1567   struct OperandInfoTy {
1568     SMLoc Loc;
1569     int64_t Id;
1570     bool IsSymbolic = false;
1571     bool IsDefined = false;
1572 
1573     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1574   };
1575 
1576   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1577   bool validateSendMsg(const OperandInfoTy &Msg,
1578                        const OperandInfoTy &Op,
1579                        const OperandInfoTy &Stream);
1580 
1581   bool parseHwregBody(OperandInfoTy &HwReg,
1582                       OperandInfoTy &Offset,
1583                       OperandInfoTy &Width);
1584   bool validateHwreg(const OperandInfoTy &HwReg,
1585                      const OperandInfoTy &Offset,
1586                      const OperandInfoTy &Width);
1587 
1588   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1589   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1590   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1591 
1592   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1593                       const OperandVector &Operands) const;
1594   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1595   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1596   SMLoc getLitLoc(const OperandVector &Operands) const;
1597   SMLoc getConstLoc(const OperandVector &Operands) const;
1598 
1599   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1600   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1601   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1602   bool validateSOPLiteral(const MCInst &Inst) const;
1603   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1605   bool validateIntClampSupported(const MCInst &Inst);
1606   bool validateMIMGAtomicDMask(const MCInst &Inst);
1607   bool validateMIMGGatherDMask(const MCInst &Inst);
1608   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1609   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1610   bool validateMIMGAddrSize(const MCInst &Inst);
1611   bool validateMIMGD16(const MCInst &Inst);
1612   bool validateMIMGDim(const MCInst &Inst);
1613   bool validateMIMGMSAA(const MCInst &Inst);
1614   bool validateOpSel(const MCInst &Inst);
1615   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1616   bool validateVccOperand(unsigned Reg) const;
1617   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1618   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1619   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1620   bool validateAGPRLdSt(const MCInst &Inst) const;
1621   bool validateVGPRAlign(const MCInst &Inst) const;
1622   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1623   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateDivScale(const MCInst &Inst);
1625   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1626                              const SMLoc &IDLoc);
1627   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1628   unsigned getConstantBusLimit(unsigned Opcode) const;
1629   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1630   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1631   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1632 
1633   bool isSupportedMnemo(StringRef Mnemo,
1634                         const FeatureBitset &FBS);
1635   bool isSupportedMnemo(StringRef Mnemo,
1636                         const FeatureBitset &FBS,
1637                         ArrayRef<unsigned> Variants);
1638   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1639 
1640   bool isId(const StringRef Id) const;
1641   bool isId(const AsmToken &Token, const StringRef Id) const;
1642   bool isToken(const AsmToken::TokenKind Kind) const;
1643   bool trySkipId(const StringRef Id);
1644   bool trySkipId(const StringRef Pref, const StringRef Id);
1645   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1646   bool trySkipToken(const AsmToken::TokenKind Kind);
1647   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1648   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1649   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1650 
1651   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1652   AsmToken::TokenKind getTokenKind() const;
1653   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1654   bool parseExpr(OperandVector &Operands);
1655   StringRef getTokenStr() const;
1656   AsmToken peekToken();
1657   AsmToken getToken() const;
1658   SMLoc getLoc() const;
1659   void lex();
1660 
1661 public:
1662   void onBeginOfFile() override;
1663 
1664   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1665   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1666 
1667   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1668   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1669   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1670   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1671   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1672   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1673 
1674   bool parseSwizzleOperand(int64_t &Op,
1675                            const unsigned MinVal,
1676                            const unsigned MaxVal,
1677                            const StringRef ErrMsg,
1678                            SMLoc &Loc);
1679   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1680                             const unsigned MinVal,
1681                             const unsigned MaxVal,
1682                             const StringRef ErrMsg);
1683   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1684   bool parseSwizzleOffset(int64_t &Imm);
1685   bool parseSwizzleMacro(int64_t &Imm);
1686   bool parseSwizzleQuadPerm(int64_t &Imm);
1687   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1688   bool parseSwizzleBroadcast(int64_t &Imm);
1689   bool parseSwizzleSwap(int64_t &Imm);
1690   bool parseSwizzleReverse(int64_t &Imm);
1691 
1692   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1693   int64_t parseGPRIdxMacro();
1694 
1695   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1696   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1697   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1698   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1699 
1700   AMDGPUOperand::Ptr defaultCPol() const;
1701 
1702   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1703   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1704   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1705   AMDGPUOperand::Ptr defaultFlatOffset() const;
1706 
1707   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1708 
1709   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1710                OptionalImmIndexMap &OptionalIdx);
1711   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1712   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1713   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1714   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1715                 OptionalImmIndexMap &OptionalIdx);
1716 
1717   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1718 
1719   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1720                bool IsAtomic = false);
1721   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1722   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1723 
1724   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1725 
1726   bool parseDimId(unsigned &Encoding);
1727   OperandMatchResultTy parseDim(OperandVector &Operands);
1728   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1729   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1730   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1731   int64_t parseDPPCtrlSel(StringRef Ctrl);
1732   int64_t parseDPPCtrlPerm();
1733   AMDGPUOperand::Ptr defaultRowMask() const;
1734   AMDGPUOperand::Ptr defaultBankMask() const;
1735   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1736   AMDGPUOperand::Ptr defaultFI() const;
1737   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1738   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1739 
1740   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1741                                     AMDGPUOperand::ImmTy Type);
1742   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1743   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1744   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1745   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1746   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1747   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1748   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1749                uint64_t BasicInstType,
1750                bool SkipDstVcc = false,
1751                bool SkipSrcVcc = false);
1752 
1753   AMDGPUOperand::Ptr defaultBLGP() const;
1754   AMDGPUOperand::Ptr defaultCBSZ() const;
1755   AMDGPUOperand::Ptr defaultABID() const;
1756 
1757   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1758   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1759 };
1760 
1761 struct OptionalOperand {
1762   const char *Name;
1763   AMDGPUOperand::ImmTy Type;
1764   bool IsBit;
1765   bool (*ConvertResult)(int64_t&);
1766 };
1767 
1768 } // end anonymous namespace
1769 
1770 // May be called with integer type with equivalent bitwidth.
1771 static const fltSemantics *getFltSemantics(unsigned Size) {
1772   switch (Size) {
1773   case 4:
1774     return &APFloat::IEEEsingle();
1775   case 8:
1776     return &APFloat::IEEEdouble();
1777   case 2:
1778     return &APFloat::IEEEhalf();
1779   default:
1780     llvm_unreachable("unsupported fp type");
1781   }
1782 }
1783 
1784 static const fltSemantics *getFltSemantics(MVT VT) {
1785   return getFltSemantics(VT.getSizeInBits() / 8);
1786 }
1787 
1788 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1789   switch (OperandType) {
1790   case AMDGPU::OPERAND_REG_IMM_INT32:
1791   case AMDGPU::OPERAND_REG_IMM_FP32:
1792   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1797   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1798   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1799   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1800   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1801   case AMDGPU::OPERAND_KIMM32:
1802     return &APFloat::IEEEsingle();
1803   case AMDGPU::OPERAND_REG_IMM_INT64:
1804   case AMDGPU::OPERAND_REG_IMM_FP64:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1808     return &APFloat::IEEEdouble();
1809   case AMDGPU::OPERAND_REG_IMM_INT16:
1810   case AMDGPU::OPERAND_REG_IMM_FP16:
1811   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1812   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1813   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1814   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1815   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1816   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1817   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1818   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1820   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1821   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1822   case AMDGPU::OPERAND_KIMM16:
1823     return &APFloat::IEEEhalf();
1824   default:
1825     llvm_unreachable("unsupported fp type");
1826   }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // Operand
1831 //===----------------------------------------------------------------------===//
1832 
1833 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1834   bool Lost;
1835 
1836   // Convert literal to single precision
1837   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1838                                                APFloat::rmNearestTiesToEven,
1839                                                &Lost);
1840   // We allow precision lost but not overflow or underflow
1841   if (Status != APFloat::opOK &&
1842       Lost &&
1843       ((Status & APFloat::opOverflow)  != 0 ||
1844        (Status & APFloat::opUnderflow) != 0)) {
1845     return false;
1846   }
1847 
1848   return true;
1849 }
1850 
1851 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1852   return isUIntN(Size, Val) || isIntN(Size, Val);
1853 }
1854 
1855 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1856   if (VT.getScalarType() == MVT::i16) {
1857     // FP immediate values are broken.
1858     return isInlinableIntLiteral(Val);
1859   }
1860 
1861   // f16/v2f16 operands work correctly for all values.
1862   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1863 }
1864 
1865 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1866 
1867   // This is a hack to enable named inline values like
1868   // shared_base with both 32-bit and 64-bit operands.
1869   // Note that these values are defined as
1870   // 32-bit operands only.
1871   if (isInlineValue()) {
1872     return true;
1873   }
1874 
1875   if (!isImmTy(ImmTyNone)) {
1876     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1877     return false;
1878   }
1879   // TODO: We should avoid using host float here. It would be better to
1880   // check the float bit values which is what a few other places do.
1881   // We've had bot failures before due to weird NaN support on mips hosts.
1882 
1883   APInt Literal(64, Imm.Val);
1884 
1885   if (Imm.IsFPImm) { // We got fp literal token
1886     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1887       return AMDGPU::isInlinableLiteral64(Imm.Val,
1888                                           AsmParser->hasInv2PiInlineImm());
1889     }
1890 
1891     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1892     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1893       return false;
1894 
1895     if (type.getScalarSizeInBits() == 16) {
1896       return isInlineableLiteralOp16(
1897         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1898         type, AsmParser->hasInv2PiInlineImm());
1899     }
1900 
1901     // Check if single precision literal is inlinable
1902     return AMDGPU::isInlinableLiteral32(
1903       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1904       AsmParser->hasInv2PiInlineImm());
1905   }
1906 
1907   // We got int literal token.
1908   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1909     return AMDGPU::isInlinableLiteral64(Imm.Val,
1910                                         AsmParser->hasInv2PiInlineImm());
1911   }
1912 
1913   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1914     return false;
1915   }
1916 
1917   if (type.getScalarSizeInBits() == 16) {
1918     return isInlineableLiteralOp16(
1919       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1920       type, AsmParser->hasInv2PiInlineImm());
1921   }
1922 
1923   return AMDGPU::isInlinableLiteral32(
1924     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1925     AsmParser->hasInv2PiInlineImm());
1926 }
1927 
1928 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1929   // Check that this immediate can be added as literal
1930   if (!isImmTy(ImmTyNone)) {
1931     return false;
1932   }
1933 
1934   if (!Imm.IsFPImm) {
1935     // We got int literal token.
1936 
1937     if (type == MVT::f64 && hasFPModifiers()) {
1938       // Cannot apply fp modifiers to int literals preserving the same semantics
1939       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1940       // disable these cases.
1941       return false;
1942     }
1943 
1944     unsigned Size = type.getSizeInBits();
1945     if (Size == 64)
1946       Size = 32;
1947 
1948     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1949     // types.
1950     return isSafeTruncation(Imm.Val, Size);
1951   }
1952 
1953   // We got fp literal token
1954   if (type == MVT::f64) { // Expected 64-bit fp operand
1955     // We would set low 64-bits of literal to zeroes but we accept this literals
1956     return true;
1957   }
1958 
1959   if (type == MVT::i64) { // Expected 64-bit int operand
1960     // We don't allow fp literals in 64-bit integer instructions. It is
1961     // unclear how we should encode them.
1962     return false;
1963   }
1964 
1965   // We allow fp literals with f16x2 operands assuming that the specified
1966   // literal goes into the lower half and the upper half is zero. We also
1967   // require that the literal may be losslessly converted to f16.
1968   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1969                      (type == MVT::v2i16)? MVT::i16 :
1970                      (type == MVT::v2f32)? MVT::f32 : type;
1971 
1972   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1973   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1974 }
1975 
1976 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1977   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1978 }
1979 
1980 bool AMDGPUOperand::isVRegWithInputMods() const {
1981   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1982          // GFX90A allows DPP on 64-bit operands.
1983          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1984           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1985 }
1986 
1987 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1988   if (AsmParser->isVI())
1989     return isVReg32();
1990   else if (AsmParser->isGFX9Plus())
1991     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1992   else
1993     return false;
1994 }
1995 
1996 bool AMDGPUOperand::isSDWAFP16Operand() const {
1997   return isSDWAOperand(MVT::f16);
1998 }
1999 
2000 bool AMDGPUOperand::isSDWAFP32Operand() const {
2001   return isSDWAOperand(MVT::f32);
2002 }
2003 
2004 bool AMDGPUOperand::isSDWAInt16Operand() const {
2005   return isSDWAOperand(MVT::i16);
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAInt32Operand() const {
2009   return isSDWAOperand(MVT::i32);
2010 }
2011 
2012 bool AMDGPUOperand::isBoolReg() const {
2013   auto FB = AsmParser->getFeatureBits();
2014   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2015                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2016 }
2017 
2018 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2019 {
2020   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2021   assert(Size == 2 || Size == 4 || Size == 8);
2022 
2023   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2024 
2025   if (Imm.Mods.Abs) {
2026     Val &= ~FpSignMask;
2027   }
2028   if (Imm.Mods.Neg) {
2029     Val ^= FpSignMask;
2030   }
2031 
2032   return Val;
2033 }
2034 
2035 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2036   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2037                              Inst.getNumOperands())) {
2038     addLiteralImmOperand(Inst, Imm.Val,
2039                          ApplyModifiers &
2040                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2041   } else {
2042     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2043     Inst.addOperand(MCOperand::createImm(Imm.Val));
2044     setImmKindNone();
2045   }
2046 }
2047 
2048 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2049   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2050   auto OpNum = Inst.getNumOperands();
2051   // Check that this operand accepts literals
2052   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2053 
2054   if (ApplyModifiers) {
2055     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2056     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2057     Val = applyInputFPModifiers(Val, Size);
2058   }
2059 
2060   APInt Literal(64, Val);
2061   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2062 
2063   if (Imm.IsFPImm) { // We got fp literal token
2064     switch (OpTy) {
2065     case AMDGPU::OPERAND_REG_IMM_INT64:
2066     case AMDGPU::OPERAND_REG_IMM_FP64:
2067     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2068     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2069     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2070       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2071                                        AsmParser->hasInv2PiInlineImm())) {
2072         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2073         setImmKindConst();
2074         return;
2075       }
2076 
2077       // Non-inlineable
2078       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2079         // For fp operands we check if low 32 bits are zeros
2080         if (Literal.getLoBits(32) != 0) {
2081           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2082           "Can't encode literal as exact 64-bit floating-point operand. "
2083           "Low 32-bits will be set to zero");
2084         }
2085 
2086         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2087         setImmKindLiteral();
2088         return;
2089       }
2090 
2091       // We don't allow fp literals in 64-bit integer instructions. It is
2092       // unclear how we should encode them. This case should be checked earlier
2093       // in predicate methods (isLiteralImm())
2094       llvm_unreachable("fp literal in 64-bit integer instruction.");
2095 
2096     case AMDGPU::OPERAND_REG_IMM_INT32:
2097     case AMDGPU::OPERAND_REG_IMM_FP32:
2098     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2099     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2100     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2102     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2103     case AMDGPU::OPERAND_REG_IMM_INT16:
2104     case AMDGPU::OPERAND_REG_IMM_FP16:
2105     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2106     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2107     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2108     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2109     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2110     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2111     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2112     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2114     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2115     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2116     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2117     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2118     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2119     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2120     case AMDGPU::OPERAND_KIMM32:
2121     case AMDGPU::OPERAND_KIMM16: {
2122       bool lost;
2123       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2124       // Convert literal to single precision
2125       FPLiteral.convert(*getOpFltSemantics(OpTy),
2126                         APFloat::rmNearestTiesToEven, &lost);
2127       // We allow precision lost but not overflow or underflow. This should be
2128       // checked earlier in isLiteralImm()
2129 
2130       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2131       Inst.addOperand(MCOperand::createImm(ImmVal));
2132       setImmKindLiteral();
2133       return;
2134     }
2135     default:
2136       llvm_unreachable("invalid operand size");
2137     }
2138 
2139     return;
2140   }
2141 
2142   // We got int literal token.
2143   // Only sign extend inline immediates.
2144   switch (OpTy) {
2145   case AMDGPU::OPERAND_REG_IMM_INT32:
2146   case AMDGPU::OPERAND_REG_IMM_FP32:
2147   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2148   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2149   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2150   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2151   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2152   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2153   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2154   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2155   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2156   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2157   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2158     if (isSafeTruncation(Val, 32) &&
2159         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2160                                      AsmParser->hasInv2PiInlineImm())) {
2161       Inst.addOperand(MCOperand::createImm(Val));
2162       setImmKindConst();
2163       return;
2164     }
2165 
2166     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2167     setImmKindLiteral();
2168     return;
2169 
2170   case AMDGPU::OPERAND_REG_IMM_INT64:
2171   case AMDGPU::OPERAND_REG_IMM_FP64:
2172   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2173   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2174   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2175     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2176       Inst.addOperand(MCOperand::createImm(Val));
2177       setImmKindConst();
2178       return;
2179     }
2180 
2181     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2182     setImmKindLiteral();
2183     return;
2184 
2185   case AMDGPU::OPERAND_REG_IMM_INT16:
2186   case AMDGPU::OPERAND_REG_IMM_FP16:
2187   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2188   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2189   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2190   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2191   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2192     if (isSafeTruncation(Val, 16) &&
2193         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2194                                      AsmParser->hasInv2PiInlineImm())) {
2195       Inst.addOperand(MCOperand::createImm(Val));
2196       setImmKindConst();
2197       return;
2198     }
2199 
2200     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2201     setImmKindLiteral();
2202     return;
2203 
2204   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2205   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2206   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2207   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2208     assert(isSafeTruncation(Val, 16));
2209     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2210                                         AsmParser->hasInv2PiInlineImm()));
2211 
2212     Inst.addOperand(MCOperand::createImm(Val));
2213     return;
2214   }
2215   case AMDGPU::OPERAND_KIMM32:
2216     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2217     setImmKindNone();
2218     return;
2219   case AMDGPU::OPERAND_KIMM16:
2220     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2221     setImmKindNone();
2222     return;
2223   default:
2224     llvm_unreachable("invalid operand size");
2225   }
2226 }
2227 
2228 template <unsigned Bitwidth>
2229 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2230   APInt Literal(64, Imm.Val);
2231   setImmKindNone();
2232 
2233   if (!Imm.IsFPImm) {
2234     // We got int literal token.
2235     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2236     return;
2237   }
2238 
2239   bool Lost;
2240   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2241   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2242                     APFloat::rmNearestTiesToEven, &Lost);
2243   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2244 }
2245 
2246 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2247   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2248 }
2249 
2250 static bool isInlineValue(unsigned Reg) {
2251   switch (Reg) {
2252   case AMDGPU::SRC_SHARED_BASE:
2253   case AMDGPU::SRC_SHARED_LIMIT:
2254   case AMDGPU::SRC_PRIVATE_BASE:
2255   case AMDGPU::SRC_PRIVATE_LIMIT:
2256   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2257     return true;
2258   case AMDGPU::SRC_VCCZ:
2259   case AMDGPU::SRC_EXECZ:
2260   case AMDGPU::SRC_SCC:
2261     return true;
2262   case AMDGPU::SGPR_NULL:
2263     return true;
2264   default:
2265     return false;
2266   }
2267 }
2268 
2269 bool AMDGPUOperand::isInlineValue() const {
2270   return isRegKind() && ::isInlineValue(getReg());
2271 }
2272 
2273 //===----------------------------------------------------------------------===//
2274 // AsmParser
2275 //===----------------------------------------------------------------------===//
2276 
2277 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2278   if (Is == IS_VGPR) {
2279     switch (RegWidth) {
2280       default: return -1;
2281       case 32:
2282         return AMDGPU::VGPR_32RegClassID;
2283       case 64:
2284         return AMDGPU::VReg_64RegClassID;
2285       case 96:
2286         return AMDGPU::VReg_96RegClassID;
2287       case 128:
2288         return AMDGPU::VReg_128RegClassID;
2289       case 160:
2290         return AMDGPU::VReg_160RegClassID;
2291       case 192:
2292         return AMDGPU::VReg_192RegClassID;
2293       case 224:
2294         return AMDGPU::VReg_224RegClassID;
2295       case 256:
2296         return AMDGPU::VReg_256RegClassID;
2297       case 512:
2298         return AMDGPU::VReg_512RegClassID;
2299       case 1024:
2300         return AMDGPU::VReg_1024RegClassID;
2301     }
2302   } else if (Is == IS_TTMP) {
2303     switch (RegWidth) {
2304       default: return -1;
2305       case 32:
2306         return AMDGPU::TTMP_32RegClassID;
2307       case 64:
2308         return AMDGPU::TTMP_64RegClassID;
2309       case 128:
2310         return AMDGPU::TTMP_128RegClassID;
2311       case 256:
2312         return AMDGPU::TTMP_256RegClassID;
2313       case 512:
2314         return AMDGPU::TTMP_512RegClassID;
2315     }
2316   } else if (Is == IS_SGPR) {
2317     switch (RegWidth) {
2318       default: return -1;
2319       case 32:
2320         return AMDGPU::SGPR_32RegClassID;
2321       case 64:
2322         return AMDGPU::SGPR_64RegClassID;
2323       case 96:
2324         return AMDGPU::SGPR_96RegClassID;
2325       case 128:
2326         return AMDGPU::SGPR_128RegClassID;
2327       case 160:
2328         return AMDGPU::SGPR_160RegClassID;
2329       case 192:
2330         return AMDGPU::SGPR_192RegClassID;
2331       case 224:
2332         return AMDGPU::SGPR_224RegClassID;
2333       case 256:
2334         return AMDGPU::SGPR_256RegClassID;
2335       case 512:
2336         return AMDGPU::SGPR_512RegClassID;
2337     }
2338   } else if (Is == IS_AGPR) {
2339     switch (RegWidth) {
2340       default: return -1;
2341       case 32:
2342         return AMDGPU::AGPR_32RegClassID;
2343       case 64:
2344         return AMDGPU::AReg_64RegClassID;
2345       case 96:
2346         return AMDGPU::AReg_96RegClassID;
2347       case 128:
2348         return AMDGPU::AReg_128RegClassID;
2349       case 160:
2350         return AMDGPU::AReg_160RegClassID;
2351       case 192:
2352         return AMDGPU::AReg_192RegClassID;
2353       case 224:
2354         return AMDGPU::AReg_224RegClassID;
2355       case 256:
2356         return AMDGPU::AReg_256RegClassID;
2357       case 512:
2358         return AMDGPU::AReg_512RegClassID;
2359       case 1024:
2360         return AMDGPU::AReg_1024RegClassID;
2361     }
2362   }
2363   return -1;
2364 }
2365 
2366 static unsigned getSpecialRegForName(StringRef RegName) {
2367   return StringSwitch<unsigned>(RegName)
2368     .Case("exec", AMDGPU::EXEC)
2369     .Case("vcc", AMDGPU::VCC)
2370     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2371     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2372     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2373     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2374     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2375     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2376     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2377     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2378     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2379     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2380     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2381     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2382     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2383     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2384     .Case("m0", AMDGPU::M0)
2385     .Case("vccz", AMDGPU::SRC_VCCZ)
2386     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2387     .Case("execz", AMDGPU::SRC_EXECZ)
2388     .Case("src_execz", AMDGPU::SRC_EXECZ)
2389     .Case("scc", AMDGPU::SRC_SCC)
2390     .Case("src_scc", AMDGPU::SRC_SCC)
2391     .Case("tba", AMDGPU::TBA)
2392     .Case("tma", AMDGPU::TMA)
2393     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2394     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2395     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2396     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2397     .Case("vcc_lo", AMDGPU::VCC_LO)
2398     .Case("vcc_hi", AMDGPU::VCC_HI)
2399     .Case("exec_lo", AMDGPU::EXEC_LO)
2400     .Case("exec_hi", AMDGPU::EXEC_HI)
2401     .Case("tma_lo", AMDGPU::TMA_LO)
2402     .Case("tma_hi", AMDGPU::TMA_HI)
2403     .Case("tba_lo", AMDGPU::TBA_LO)
2404     .Case("tba_hi", AMDGPU::TBA_HI)
2405     .Case("pc", AMDGPU::PC_REG)
2406     .Case("null", AMDGPU::SGPR_NULL)
2407     .Default(AMDGPU::NoRegister);
2408 }
2409 
2410 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2411                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2412   auto R = parseRegister();
2413   if (!R) return true;
2414   assert(R->isReg());
2415   RegNo = R->getReg();
2416   StartLoc = R->getStartLoc();
2417   EndLoc = R->getEndLoc();
2418   return false;
2419 }
2420 
2421 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2422                                     SMLoc &EndLoc) {
2423   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2424 }
2425 
2426 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2427                                                        SMLoc &StartLoc,
2428                                                        SMLoc &EndLoc) {
2429   bool Result =
2430       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2431   bool PendingErrors = getParser().hasPendingError();
2432   getParser().clearPendingErrors();
2433   if (PendingErrors)
2434     return MatchOperand_ParseFail;
2435   if (Result)
2436     return MatchOperand_NoMatch;
2437   return MatchOperand_Success;
2438 }
2439 
2440 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2441                                             RegisterKind RegKind, unsigned Reg1,
2442                                             SMLoc Loc) {
2443   switch (RegKind) {
2444   case IS_SPECIAL:
2445     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2446       Reg = AMDGPU::EXEC;
2447       RegWidth = 64;
2448       return true;
2449     }
2450     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2451       Reg = AMDGPU::FLAT_SCR;
2452       RegWidth = 64;
2453       return true;
2454     }
2455     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2456       Reg = AMDGPU::XNACK_MASK;
2457       RegWidth = 64;
2458       return true;
2459     }
2460     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2461       Reg = AMDGPU::VCC;
2462       RegWidth = 64;
2463       return true;
2464     }
2465     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2466       Reg = AMDGPU::TBA;
2467       RegWidth = 64;
2468       return true;
2469     }
2470     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2471       Reg = AMDGPU::TMA;
2472       RegWidth = 64;
2473       return true;
2474     }
2475     Error(Loc, "register does not fit in the list");
2476     return false;
2477   case IS_VGPR:
2478   case IS_SGPR:
2479   case IS_AGPR:
2480   case IS_TTMP:
2481     if (Reg1 != Reg + RegWidth / 32) {
2482       Error(Loc, "registers in a list must have consecutive indices");
2483       return false;
2484     }
2485     RegWidth += 32;
2486     return true;
2487   default:
2488     llvm_unreachable("unexpected register kind");
2489   }
2490 }
2491 
2492 struct RegInfo {
2493   StringLiteral Name;
2494   RegisterKind Kind;
2495 };
2496 
2497 static constexpr RegInfo RegularRegisters[] = {
2498   {{"v"},    IS_VGPR},
2499   {{"s"},    IS_SGPR},
2500   {{"ttmp"}, IS_TTMP},
2501   {{"acc"},  IS_AGPR},
2502   {{"a"},    IS_AGPR},
2503 };
2504 
2505 static bool isRegularReg(RegisterKind Kind) {
2506   return Kind == IS_VGPR ||
2507          Kind == IS_SGPR ||
2508          Kind == IS_TTMP ||
2509          Kind == IS_AGPR;
2510 }
2511 
2512 static const RegInfo* getRegularRegInfo(StringRef Str) {
2513   for (const RegInfo &Reg : RegularRegisters)
2514     if (Str.startswith(Reg.Name))
2515       return &Reg;
2516   return nullptr;
2517 }
2518 
2519 static bool getRegNum(StringRef Str, unsigned& Num) {
2520   return !Str.getAsInteger(10, Num);
2521 }
2522 
2523 bool
2524 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2525                             const AsmToken &NextToken) const {
2526 
2527   // A list of consecutive registers: [s0,s1,s2,s3]
2528   if (Token.is(AsmToken::LBrac))
2529     return true;
2530 
2531   if (!Token.is(AsmToken::Identifier))
2532     return false;
2533 
2534   // A single register like s0 or a range of registers like s[0:1]
2535 
2536   StringRef Str = Token.getString();
2537   const RegInfo *Reg = getRegularRegInfo(Str);
2538   if (Reg) {
2539     StringRef RegName = Reg->Name;
2540     StringRef RegSuffix = Str.substr(RegName.size());
2541     if (!RegSuffix.empty()) {
2542       unsigned Num;
2543       // A single register with an index: rXX
2544       if (getRegNum(RegSuffix, Num))
2545         return true;
2546     } else {
2547       // A range of registers: r[XX:YY].
2548       if (NextToken.is(AsmToken::LBrac))
2549         return true;
2550     }
2551   }
2552 
2553   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2554 }
2555 
2556 bool
2557 AMDGPUAsmParser::isRegister()
2558 {
2559   return isRegister(getToken(), peekToken());
2560 }
2561 
2562 unsigned
2563 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2564                                unsigned RegNum,
2565                                unsigned RegWidth,
2566                                SMLoc Loc) {
2567 
2568   assert(isRegularReg(RegKind));
2569 
2570   unsigned AlignSize = 1;
2571   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2572     // SGPR and TTMP registers must be aligned.
2573     // Max required alignment is 4 dwords.
2574     AlignSize = std::min(RegWidth / 32, 4u);
2575   }
2576 
2577   if (RegNum % AlignSize != 0) {
2578     Error(Loc, "invalid register alignment");
2579     return AMDGPU::NoRegister;
2580   }
2581 
2582   unsigned RegIdx = RegNum / AlignSize;
2583   int RCID = getRegClass(RegKind, RegWidth);
2584   if (RCID == -1) {
2585     Error(Loc, "invalid or unsupported register size");
2586     return AMDGPU::NoRegister;
2587   }
2588 
2589   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2590   const MCRegisterClass RC = TRI->getRegClass(RCID);
2591   if (RegIdx >= RC.getNumRegs()) {
2592     Error(Loc, "register index is out of range");
2593     return AMDGPU::NoRegister;
2594   }
2595 
2596   return RC.getRegister(RegIdx);
2597 }
2598 
2599 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2600   int64_t RegLo, RegHi;
2601   if (!skipToken(AsmToken::LBrac, "missing register index"))
2602     return false;
2603 
2604   SMLoc FirstIdxLoc = getLoc();
2605   SMLoc SecondIdxLoc;
2606 
2607   if (!parseExpr(RegLo))
2608     return false;
2609 
2610   if (trySkipToken(AsmToken::Colon)) {
2611     SecondIdxLoc = getLoc();
2612     if (!parseExpr(RegHi))
2613       return false;
2614   } else {
2615     RegHi = RegLo;
2616   }
2617 
2618   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2619     return false;
2620 
2621   if (!isUInt<32>(RegLo)) {
2622     Error(FirstIdxLoc, "invalid register index");
2623     return false;
2624   }
2625 
2626   if (!isUInt<32>(RegHi)) {
2627     Error(SecondIdxLoc, "invalid register index");
2628     return false;
2629   }
2630 
2631   if (RegLo > RegHi) {
2632     Error(FirstIdxLoc, "first register index should not exceed second index");
2633     return false;
2634   }
2635 
2636   Num = static_cast<unsigned>(RegLo);
2637   RegWidth = 32 * ((RegHi - RegLo) + 1);
2638   return true;
2639 }
2640 
2641 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2642                                           unsigned &RegNum, unsigned &RegWidth,
2643                                           SmallVectorImpl<AsmToken> &Tokens) {
2644   assert(isToken(AsmToken::Identifier));
2645   unsigned Reg = getSpecialRegForName(getTokenStr());
2646   if (Reg) {
2647     RegNum = 0;
2648     RegWidth = 32;
2649     RegKind = IS_SPECIAL;
2650     Tokens.push_back(getToken());
2651     lex(); // skip register name
2652   }
2653   return Reg;
2654 }
2655 
2656 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2657                                           unsigned &RegNum, unsigned &RegWidth,
2658                                           SmallVectorImpl<AsmToken> &Tokens) {
2659   assert(isToken(AsmToken::Identifier));
2660   StringRef RegName = getTokenStr();
2661   auto Loc = getLoc();
2662 
2663   const RegInfo *RI = getRegularRegInfo(RegName);
2664   if (!RI) {
2665     Error(Loc, "invalid register name");
2666     return AMDGPU::NoRegister;
2667   }
2668 
2669   Tokens.push_back(getToken());
2670   lex(); // skip register name
2671 
2672   RegKind = RI->Kind;
2673   StringRef RegSuffix = RegName.substr(RI->Name.size());
2674   if (!RegSuffix.empty()) {
2675     // Single 32-bit register: vXX.
2676     if (!getRegNum(RegSuffix, RegNum)) {
2677       Error(Loc, "invalid register index");
2678       return AMDGPU::NoRegister;
2679     }
2680     RegWidth = 32;
2681   } else {
2682     // Range of registers: v[XX:YY]. ":YY" is optional.
2683     if (!ParseRegRange(RegNum, RegWidth))
2684       return AMDGPU::NoRegister;
2685   }
2686 
2687   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2688 }
2689 
2690 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2691                                        unsigned &RegWidth,
2692                                        SmallVectorImpl<AsmToken> &Tokens) {
2693   unsigned Reg = AMDGPU::NoRegister;
2694   auto ListLoc = getLoc();
2695 
2696   if (!skipToken(AsmToken::LBrac,
2697                  "expected a register or a list of registers")) {
2698     return AMDGPU::NoRegister;
2699   }
2700 
2701   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2702 
2703   auto Loc = getLoc();
2704   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2705     return AMDGPU::NoRegister;
2706   if (RegWidth != 32) {
2707     Error(Loc, "expected a single 32-bit register");
2708     return AMDGPU::NoRegister;
2709   }
2710 
2711   for (; trySkipToken(AsmToken::Comma); ) {
2712     RegisterKind NextRegKind;
2713     unsigned NextReg, NextRegNum, NextRegWidth;
2714     Loc = getLoc();
2715 
2716     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2717                              NextRegNum, NextRegWidth,
2718                              Tokens)) {
2719       return AMDGPU::NoRegister;
2720     }
2721     if (NextRegWidth != 32) {
2722       Error(Loc, "expected a single 32-bit register");
2723       return AMDGPU::NoRegister;
2724     }
2725     if (NextRegKind != RegKind) {
2726       Error(Loc, "registers in a list must be of the same kind");
2727       return AMDGPU::NoRegister;
2728     }
2729     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2730       return AMDGPU::NoRegister;
2731   }
2732 
2733   if (!skipToken(AsmToken::RBrac,
2734                  "expected a comma or a closing square bracket")) {
2735     return AMDGPU::NoRegister;
2736   }
2737 
2738   if (isRegularReg(RegKind))
2739     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2740 
2741   return Reg;
2742 }
2743 
2744 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2745                                           unsigned &RegNum, unsigned &RegWidth,
2746                                           SmallVectorImpl<AsmToken> &Tokens) {
2747   auto Loc = getLoc();
2748   Reg = AMDGPU::NoRegister;
2749 
2750   if (isToken(AsmToken::Identifier)) {
2751     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2752     if (Reg == AMDGPU::NoRegister)
2753       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2754   } else {
2755     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2756   }
2757 
2758   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2759   if (Reg == AMDGPU::NoRegister) {
2760     assert(Parser.hasPendingError());
2761     return false;
2762   }
2763 
2764   if (!subtargetHasRegister(*TRI, Reg)) {
2765     if (Reg == AMDGPU::SGPR_NULL) {
2766       Error(Loc, "'null' operand is not supported on this GPU");
2767     } else {
2768       Error(Loc, "register not available on this GPU");
2769     }
2770     return false;
2771   }
2772 
2773   return true;
2774 }
2775 
2776 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2777                                           unsigned &RegNum, unsigned &RegWidth,
2778                                           bool RestoreOnFailure /*=false*/) {
2779   Reg = AMDGPU::NoRegister;
2780 
2781   SmallVector<AsmToken, 1> Tokens;
2782   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2783     if (RestoreOnFailure) {
2784       while (!Tokens.empty()) {
2785         getLexer().UnLex(Tokens.pop_back_val());
2786       }
2787     }
2788     return true;
2789   }
2790   return false;
2791 }
2792 
2793 Optional<StringRef>
2794 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2795   switch (RegKind) {
2796   case IS_VGPR:
2797     return StringRef(".amdgcn.next_free_vgpr");
2798   case IS_SGPR:
2799     return StringRef(".amdgcn.next_free_sgpr");
2800   default:
2801     return None;
2802   }
2803 }
2804 
2805 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2806   auto SymbolName = getGprCountSymbolName(RegKind);
2807   assert(SymbolName && "initializing invalid register kind");
2808   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2809   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2810 }
2811 
2812 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2813                                             unsigned DwordRegIndex,
2814                                             unsigned RegWidth) {
2815   // Symbols are only defined for GCN targets
2816   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2817     return true;
2818 
2819   auto SymbolName = getGprCountSymbolName(RegKind);
2820   if (!SymbolName)
2821     return true;
2822   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2823 
2824   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2825   int64_t OldCount;
2826 
2827   if (!Sym->isVariable())
2828     return !Error(getLoc(),
2829                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2830   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2831     return !Error(
2832         getLoc(),
2833         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2834 
2835   if (OldCount <= NewMax)
2836     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2837 
2838   return true;
2839 }
2840 
2841 std::unique_ptr<AMDGPUOperand>
2842 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2843   const auto &Tok = getToken();
2844   SMLoc StartLoc = Tok.getLoc();
2845   SMLoc EndLoc = Tok.getEndLoc();
2846   RegisterKind RegKind;
2847   unsigned Reg, RegNum, RegWidth;
2848 
2849   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2850     return nullptr;
2851   }
2852   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2853     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2854       return nullptr;
2855   } else
2856     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2857   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2858 }
2859 
2860 OperandMatchResultTy
2861 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2862   // TODO: add syntactic sugar for 1/(2*PI)
2863 
2864   assert(!isRegister());
2865   assert(!isModifier());
2866 
2867   const auto& Tok = getToken();
2868   const auto& NextTok = peekToken();
2869   bool IsReal = Tok.is(AsmToken::Real);
2870   SMLoc S = getLoc();
2871   bool Negate = false;
2872 
2873   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2874     lex();
2875     IsReal = true;
2876     Negate = true;
2877   }
2878 
2879   if (IsReal) {
2880     // Floating-point expressions are not supported.
2881     // Can only allow floating-point literals with an
2882     // optional sign.
2883 
2884     StringRef Num = getTokenStr();
2885     lex();
2886 
2887     APFloat RealVal(APFloat::IEEEdouble());
2888     auto roundMode = APFloat::rmNearestTiesToEven;
2889     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2890       return MatchOperand_ParseFail;
2891     }
2892     if (Negate)
2893       RealVal.changeSign();
2894 
2895     Operands.push_back(
2896       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2897                                AMDGPUOperand::ImmTyNone, true));
2898 
2899     return MatchOperand_Success;
2900 
2901   } else {
2902     int64_t IntVal;
2903     const MCExpr *Expr;
2904     SMLoc S = getLoc();
2905 
2906     if (HasSP3AbsModifier) {
2907       // This is a workaround for handling expressions
2908       // as arguments of SP3 'abs' modifier, for example:
2909       //     |1.0|
2910       //     |-1|
2911       //     |1+x|
2912       // This syntax is not compatible with syntax of standard
2913       // MC expressions (due to the trailing '|').
2914       SMLoc EndLoc;
2915       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2916         return MatchOperand_ParseFail;
2917     } else {
2918       if (Parser.parseExpression(Expr))
2919         return MatchOperand_ParseFail;
2920     }
2921 
2922     if (Expr->evaluateAsAbsolute(IntVal)) {
2923       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2924     } else {
2925       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2926     }
2927 
2928     return MatchOperand_Success;
2929   }
2930 
2931   return MatchOperand_NoMatch;
2932 }
2933 
2934 OperandMatchResultTy
2935 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2936   if (!isRegister())
2937     return MatchOperand_NoMatch;
2938 
2939   if (auto R = parseRegister()) {
2940     assert(R->isReg());
2941     Operands.push_back(std::move(R));
2942     return MatchOperand_Success;
2943   }
2944   return MatchOperand_ParseFail;
2945 }
2946 
2947 OperandMatchResultTy
2948 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2949   auto res = parseReg(Operands);
2950   if (res != MatchOperand_NoMatch) {
2951     return res;
2952   } else if (isModifier()) {
2953     return MatchOperand_NoMatch;
2954   } else {
2955     return parseImm(Operands, HasSP3AbsMod);
2956   }
2957 }
2958 
2959 bool
2960 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2961   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2962     const auto &str = Token.getString();
2963     return str == "abs" || str == "neg" || str == "sext";
2964   }
2965   return false;
2966 }
2967 
2968 bool
2969 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2970   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2971 }
2972 
2973 bool
2974 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2975   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2976 }
2977 
2978 bool
2979 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2980   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2981 }
2982 
2983 // Check if this is an operand modifier or an opcode modifier
2984 // which may look like an expression but it is not. We should
2985 // avoid parsing these modifiers as expressions. Currently
2986 // recognized sequences are:
2987 //   |...|
2988 //   abs(...)
2989 //   neg(...)
2990 //   sext(...)
2991 //   -reg
2992 //   -|...|
2993 //   -abs(...)
2994 //   name:...
2995 // Note that simple opcode modifiers like 'gds' may be parsed as
2996 // expressions; this is a special case. See getExpressionAsToken.
2997 //
2998 bool
2999 AMDGPUAsmParser::isModifier() {
3000 
3001   AsmToken Tok = getToken();
3002   AsmToken NextToken[2];
3003   peekTokens(NextToken);
3004 
3005   return isOperandModifier(Tok, NextToken[0]) ||
3006          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3007          isOpcodeModifierWithVal(Tok, NextToken[0]);
3008 }
3009 
3010 // Check if the current token is an SP3 'neg' modifier.
3011 // Currently this modifier is allowed in the following context:
3012 //
3013 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3014 // 2. Before an 'abs' modifier: -abs(...)
3015 // 3. Before an SP3 'abs' modifier: -|...|
3016 //
3017 // In all other cases "-" is handled as a part
3018 // of an expression that follows the sign.
3019 //
3020 // Note: When "-" is followed by an integer literal,
3021 // this is interpreted as integer negation rather
3022 // than a floating-point NEG modifier applied to N.
3023 // Beside being contr-intuitive, such use of floating-point
3024 // NEG modifier would have resulted in different meaning
3025 // of integer literals used with VOP1/2/C and VOP3,
3026 // for example:
3027 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3028 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3029 // Negative fp literals with preceding "-" are
3030 // handled likewise for uniformity
3031 //
3032 bool
3033 AMDGPUAsmParser::parseSP3NegModifier() {
3034 
3035   AsmToken NextToken[2];
3036   peekTokens(NextToken);
3037 
3038   if (isToken(AsmToken::Minus) &&
3039       (isRegister(NextToken[0], NextToken[1]) ||
3040        NextToken[0].is(AsmToken::Pipe) ||
3041        isId(NextToken[0], "abs"))) {
3042     lex();
3043     return true;
3044   }
3045 
3046   return false;
3047 }
3048 
3049 OperandMatchResultTy
3050 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3051                                               bool AllowImm) {
3052   bool Neg, SP3Neg;
3053   bool Abs, SP3Abs;
3054   SMLoc Loc;
3055 
3056   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3057   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3058     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3059     return MatchOperand_ParseFail;
3060   }
3061 
3062   SP3Neg = parseSP3NegModifier();
3063 
3064   Loc = getLoc();
3065   Neg = trySkipId("neg");
3066   if (Neg && SP3Neg) {
3067     Error(Loc, "expected register or immediate");
3068     return MatchOperand_ParseFail;
3069   }
3070   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3071     return MatchOperand_ParseFail;
3072 
3073   Abs = trySkipId("abs");
3074   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3075     return MatchOperand_ParseFail;
3076 
3077   Loc = getLoc();
3078   SP3Abs = trySkipToken(AsmToken::Pipe);
3079   if (Abs && SP3Abs) {
3080     Error(Loc, "expected register or immediate");
3081     return MatchOperand_ParseFail;
3082   }
3083 
3084   OperandMatchResultTy Res;
3085   if (AllowImm) {
3086     Res = parseRegOrImm(Operands, SP3Abs);
3087   } else {
3088     Res = parseReg(Operands);
3089   }
3090   if (Res != MatchOperand_Success) {
3091     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3092   }
3093 
3094   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3095     return MatchOperand_ParseFail;
3096   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3097     return MatchOperand_ParseFail;
3098   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3099     return MatchOperand_ParseFail;
3100 
3101   AMDGPUOperand::Modifiers Mods;
3102   Mods.Abs = Abs || SP3Abs;
3103   Mods.Neg = Neg || SP3Neg;
3104 
3105   if (Mods.hasFPModifiers()) {
3106     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3107     if (Op.isExpr()) {
3108       Error(Op.getStartLoc(), "expected an absolute expression");
3109       return MatchOperand_ParseFail;
3110     }
3111     Op.setModifiers(Mods);
3112   }
3113   return MatchOperand_Success;
3114 }
3115 
3116 OperandMatchResultTy
3117 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3118                                                bool AllowImm) {
3119   bool Sext = trySkipId("sext");
3120   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3121     return MatchOperand_ParseFail;
3122 
3123   OperandMatchResultTy Res;
3124   if (AllowImm) {
3125     Res = parseRegOrImm(Operands);
3126   } else {
3127     Res = parseReg(Operands);
3128   }
3129   if (Res != MatchOperand_Success) {
3130     return Sext? MatchOperand_ParseFail : Res;
3131   }
3132 
3133   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3134     return MatchOperand_ParseFail;
3135 
3136   AMDGPUOperand::Modifiers Mods;
3137   Mods.Sext = Sext;
3138 
3139   if (Mods.hasIntModifiers()) {
3140     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3141     if (Op.isExpr()) {
3142       Error(Op.getStartLoc(), "expected an absolute expression");
3143       return MatchOperand_ParseFail;
3144     }
3145     Op.setModifiers(Mods);
3146   }
3147 
3148   return MatchOperand_Success;
3149 }
3150 
3151 OperandMatchResultTy
3152 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3153   return parseRegOrImmWithFPInputMods(Operands, false);
3154 }
3155 
3156 OperandMatchResultTy
3157 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3158   return parseRegOrImmWithIntInputMods(Operands, false);
3159 }
3160 
3161 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3162   auto Loc = getLoc();
3163   if (trySkipId("off")) {
3164     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3165                                                 AMDGPUOperand::ImmTyOff, false));
3166     return MatchOperand_Success;
3167   }
3168 
3169   if (!isRegister())
3170     return MatchOperand_NoMatch;
3171 
3172   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3173   if (Reg) {
3174     Operands.push_back(std::move(Reg));
3175     return MatchOperand_Success;
3176   }
3177 
3178   return MatchOperand_ParseFail;
3179 
3180 }
3181 
3182 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3183   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3184 
3185   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3186       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3187       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3188       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3189     return Match_InvalidOperand;
3190 
3191   if ((TSFlags & SIInstrFlags::VOP3) &&
3192       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3193       getForcedEncodingSize() != 64)
3194     return Match_PreferE32;
3195 
3196   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3197       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3198     // v_mac_f32/16 allow only dst_sel == DWORD;
3199     auto OpNum =
3200         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3201     const auto &Op = Inst.getOperand(OpNum);
3202     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3203       return Match_InvalidOperand;
3204     }
3205   }
3206 
3207   return Match_Success;
3208 }
3209 
3210 static ArrayRef<unsigned> getAllVariants() {
3211   static const unsigned Variants[] = {
3212     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3213     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3214   };
3215 
3216   return makeArrayRef(Variants);
3217 }
3218 
3219 // What asm variants we should check
3220 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3221   if (getForcedEncodingSize() == 32) {
3222     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3223     return makeArrayRef(Variants);
3224   }
3225 
3226   if (isForcedVOP3()) {
3227     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3228     return makeArrayRef(Variants);
3229   }
3230 
3231   if (isForcedSDWA()) {
3232     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3233                                         AMDGPUAsmVariants::SDWA9};
3234     return makeArrayRef(Variants);
3235   }
3236 
3237   if (isForcedDPP()) {
3238     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3239     return makeArrayRef(Variants);
3240   }
3241 
3242   return getAllVariants();
3243 }
3244 
3245 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3246   if (getForcedEncodingSize() == 32)
3247     return "e32";
3248 
3249   if (isForcedVOP3())
3250     return "e64";
3251 
3252   if (isForcedSDWA())
3253     return "sdwa";
3254 
3255   if (isForcedDPP())
3256     return "dpp";
3257 
3258   return "";
3259 }
3260 
3261 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3262   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3263   const unsigned Num = Desc.getNumImplicitUses();
3264   for (unsigned i = 0; i < Num; ++i) {
3265     unsigned Reg = Desc.ImplicitUses[i];
3266     switch (Reg) {
3267     case AMDGPU::FLAT_SCR:
3268     case AMDGPU::VCC:
3269     case AMDGPU::VCC_LO:
3270     case AMDGPU::VCC_HI:
3271     case AMDGPU::M0:
3272       return Reg;
3273     default:
3274       break;
3275     }
3276   }
3277   return AMDGPU::NoRegister;
3278 }
3279 
3280 // NB: This code is correct only when used to check constant
3281 // bus limitations because GFX7 support no f16 inline constants.
3282 // Note that there are no cases when a GFX7 opcode violates
3283 // constant bus limitations due to the use of an f16 constant.
3284 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3285                                        unsigned OpIdx) const {
3286   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3287 
3288   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3289     return false;
3290   }
3291 
3292   const MCOperand &MO = Inst.getOperand(OpIdx);
3293 
3294   int64_t Val = MO.getImm();
3295   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3296 
3297   switch (OpSize) { // expected operand size
3298   case 8:
3299     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3300   case 4:
3301     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3302   case 2: {
3303     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3304     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3305         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3306         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3307       return AMDGPU::isInlinableIntLiteral(Val);
3308 
3309     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3310         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3311         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3312       return AMDGPU::isInlinableIntLiteralV216(Val);
3313 
3314     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3315         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3316         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3317       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3318 
3319     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3320   }
3321   default:
3322     llvm_unreachable("invalid operand size");
3323   }
3324 }
3325 
3326 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3327   if (!isGFX10Plus())
3328     return 1;
3329 
3330   switch (Opcode) {
3331   // 64-bit shift instructions can use only one scalar value input
3332   case AMDGPU::V_LSHLREV_B64_e64:
3333   case AMDGPU::V_LSHLREV_B64_gfx10:
3334   case AMDGPU::V_LSHRREV_B64_e64:
3335   case AMDGPU::V_LSHRREV_B64_gfx10:
3336   case AMDGPU::V_ASHRREV_I64_e64:
3337   case AMDGPU::V_ASHRREV_I64_gfx10:
3338   case AMDGPU::V_LSHL_B64_e64:
3339   case AMDGPU::V_LSHR_B64_e64:
3340   case AMDGPU::V_ASHR_I64_e64:
3341     return 1;
3342   default:
3343     return 2;
3344   }
3345 }
3346 
3347 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3348   const MCOperand &MO = Inst.getOperand(OpIdx);
3349   if (MO.isImm()) {
3350     return !isInlineConstant(Inst, OpIdx);
3351   } else if (MO.isReg()) {
3352     auto Reg = MO.getReg();
3353     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3354     auto PReg = mc2PseudoReg(Reg);
3355     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3356   } else {
3357     return true;
3358   }
3359 }
3360 
3361 bool
3362 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3363                                                 const OperandVector &Operands) {
3364   const unsigned Opcode = Inst.getOpcode();
3365   const MCInstrDesc &Desc = MII.get(Opcode);
3366   unsigned LastSGPR = AMDGPU::NoRegister;
3367   unsigned ConstantBusUseCount = 0;
3368   unsigned NumLiterals = 0;
3369   unsigned LiteralSize;
3370 
3371   if (Desc.TSFlags &
3372       (SIInstrFlags::VOPC |
3373        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3374        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3375        SIInstrFlags::SDWA)) {
3376     // Check special imm operands (used by madmk, etc)
3377     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3378       ++NumLiterals;
3379       LiteralSize = 4;
3380     }
3381 
3382     SmallDenseSet<unsigned> SGPRsUsed;
3383     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3384     if (SGPRUsed != AMDGPU::NoRegister) {
3385       SGPRsUsed.insert(SGPRUsed);
3386       ++ConstantBusUseCount;
3387     }
3388 
3389     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3390     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3391     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3392 
3393     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3394 
3395     for (int OpIdx : OpIndices) {
3396       if (OpIdx == -1) break;
3397 
3398       const MCOperand &MO = Inst.getOperand(OpIdx);
3399       if (usesConstantBus(Inst, OpIdx)) {
3400         if (MO.isReg()) {
3401           LastSGPR = mc2PseudoReg(MO.getReg());
3402           // Pairs of registers with a partial intersections like these
3403           //   s0, s[0:1]
3404           //   flat_scratch_lo, flat_scratch
3405           //   flat_scratch_lo, flat_scratch_hi
3406           // are theoretically valid but they are disabled anyway.
3407           // Note that this code mimics SIInstrInfo::verifyInstruction
3408           if (!SGPRsUsed.count(LastSGPR)) {
3409             SGPRsUsed.insert(LastSGPR);
3410             ++ConstantBusUseCount;
3411           }
3412         } else { // Expression or a literal
3413 
3414           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3415             continue; // special operand like VINTERP attr_chan
3416 
3417           // An instruction may use only one literal.
3418           // This has been validated on the previous step.
3419           // See validateVOPLiteral.
3420           // This literal may be used as more than one operand.
3421           // If all these operands are of the same size,
3422           // this literal counts as one scalar value.
3423           // Otherwise it counts as 2 scalar values.
3424           // See "GFX10 Shader Programming", section 3.6.2.3.
3425 
3426           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3427           if (Size < 4) Size = 4;
3428 
3429           if (NumLiterals == 0) {
3430             NumLiterals = 1;
3431             LiteralSize = Size;
3432           } else if (LiteralSize != Size) {
3433             NumLiterals = 2;
3434           }
3435         }
3436       }
3437     }
3438   }
3439   ConstantBusUseCount += NumLiterals;
3440 
3441   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3442     return true;
3443 
3444   SMLoc LitLoc = getLitLoc(Operands);
3445   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3446   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3447   Error(Loc, "invalid operand (violates constant bus restrictions)");
3448   return false;
3449 }
3450 
3451 bool
3452 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3453                                                  const OperandVector &Operands) {
3454   const unsigned Opcode = Inst.getOpcode();
3455   const MCInstrDesc &Desc = MII.get(Opcode);
3456 
3457   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3458   if (DstIdx == -1 ||
3459       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3460     return true;
3461   }
3462 
3463   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3464 
3465   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3466   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3467   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3468 
3469   assert(DstIdx != -1);
3470   const MCOperand &Dst = Inst.getOperand(DstIdx);
3471   assert(Dst.isReg());
3472 
3473   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3474 
3475   for (int SrcIdx : SrcIndices) {
3476     if (SrcIdx == -1) break;
3477     const MCOperand &Src = Inst.getOperand(SrcIdx);
3478     if (Src.isReg()) {
3479       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3480         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3481         Error(getRegLoc(SrcReg, Operands),
3482           "destination must be different than all sources");
3483         return false;
3484       }
3485     }
3486   }
3487 
3488   return true;
3489 }
3490 
3491 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3492 
3493   const unsigned Opc = Inst.getOpcode();
3494   const MCInstrDesc &Desc = MII.get(Opc);
3495 
3496   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3497     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3498     assert(ClampIdx != -1);
3499     return Inst.getOperand(ClampIdx).getImm() == 0;
3500   }
3501 
3502   return true;
3503 }
3504 
3505 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3506 
3507   const unsigned Opc = Inst.getOpcode();
3508   const MCInstrDesc &Desc = MII.get(Opc);
3509 
3510   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3511     return None;
3512 
3513   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3514   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3515   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3516 
3517   assert(VDataIdx != -1);
3518 
3519   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3520     return None;
3521 
3522   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3523   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3524   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3525   if (DMask == 0)
3526     DMask = 1;
3527 
3528   bool isPackedD16 = false;
3529   unsigned DataSize =
3530     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3531   if (hasPackedD16()) {
3532     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3533     isPackedD16 = D16Idx >= 0;
3534     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3535       DataSize = (DataSize + 1) / 2;
3536   }
3537 
3538   if ((VDataSize / 4) == DataSize + TFESize)
3539     return None;
3540 
3541   return StringRef(isPackedD16
3542                        ? "image data size does not match dmask, d16 and tfe"
3543                        : "image data size does not match dmask and tfe");
3544 }
3545 
3546 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3547   const unsigned Opc = Inst.getOpcode();
3548   const MCInstrDesc &Desc = MII.get(Opc);
3549 
3550   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3551     return true;
3552 
3553   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3554 
3555   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3556       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3557   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3558   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3559   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3560   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3561 
3562   assert(VAddr0Idx != -1);
3563   assert(SrsrcIdx != -1);
3564   assert(SrsrcIdx > VAddr0Idx);
3565 
3566   if (DimIdx == -1)
3567     return true; // intersect_ray
3568 
3569   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3570   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3571   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3572   unsigned ActualAddrSize =
3573       IsNSA ? SrsrcIdx - VAddr0Idx
3574             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3575   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3576 
3577   unsigned ExpectedAddrSize =
3578       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3579 
3580   if (!IsNSA) {
3581     if (ExpectedAddrSize > 8)
3582       ExpectedAddrSize = 16;
3583 
3584     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3585     // This provides backward compatibility for assembly created
3586     // before 160b/192b/224b types were directly supported.
3587     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3588       return true;
3589   }
3590 
3591   return ActualAddrSize == ExpectedAddrSize;
3592 }
3593 
3594 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3595 
3596   const unsigned Opc = Inst.getOpcode();
3597   const MCInstrDesc &Desc = MII.get(Opc);
3598 
3599   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3600     return true;
3601   if (!Desc.mayLoad() || !Desc.mayStore())
3602     return true; // Not atomic
3603 
3604   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3605   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3606 
3607   // This is an incomplete check because image_atomic_cmpswap
3608   // may only use 0x3 and 0xf while other atomic operations
3609   // may use 0x1 and 0x3. However these limitations are
3610   // verified when we check that dmask matches dst size.
3611   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3612 }
3613 
3614 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3615 
3616   const unsigned Opc = Inst.getOpcode();
3617   const MCInstrDesc &Desc = MII.get(Opc);
3618 
3619   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3620     return true;
3621 
3622   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3623   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3624 
3625   // GATHER4 instructions use dmask in a different fashion compared to
3626   // other MIMG instructions. The only useful DMASK values are
3627   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3628   // (red,red,red,red) etc.) The ISA document doesn't mention
3629   // this.
3630   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3631 }
3632 
3633 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3634   const unsigned Opc = Inst.getOpcode();
3635   const MCInstrDesc &Desc = MII.get(Opc);
3636 
3637   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3638     return true;
3639 
3640   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3641   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3642       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3643 
3644   if (!BaseOpcode->MSAA)
3645     return true;
3646 
3647   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3648   assert(DimIdx != -1);
3649 
3650   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3651   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3652 
3653   return DimInfo->MSAA;
3654 }
3655 
3656 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3657 {
3658   switch (Opcode) {
3659   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3660   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3661   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3662     return true;
3663   default:
3664     return false;
3665   }
3666 }
3667 
3668 // movrels* opcodes should only allow VGPRS as src0.
3669 // This is specified in .td description for vop1/vop3,
3670 // but sdwa is handled differently. See isSDWAOperand.
3671 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3672                                       const OperandVector &Operands) {
3673 
3674   const unsigned Opc = Inst.getOpcode();
3675   const MCInstrDesc &Desc = MII.get(Opc);
3676 
3677   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3678     return true;
3679 
3680   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3681   assert(Src0Idx != -1);
3682 
3683   SMLoc ErrLoc;
3684   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3685   if (Src0.isReg()) {
3686     auto Reg = mc2PseudoReg(Src0.getReg());
3687     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3688     if (!isSGPR(Reg, TRI))
3689       return true;
3690     ErrLoc = getRegLoc(Reg, Operands);
3691   } else {
3692     ErrLoc = getConstLoc(Operands);
3693   }
3694 
3695   Error(ErrLoc, "source operand must be a VGPR");
3696   return false;
3697 }
3698 
3699 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3700                                           const OperandVector &Operands) {
3701 
3702   const unsigned Opc = Inst.getOpcode();
3703 
3704   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3705     return true;
3706 
3707   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3708   assert(Src0Idx != -1);
3709 
3710   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3711   if (!Src0.isReg())
3712     return true;
3713 
3714   auto Reg = mc2PseudoReg(Src0.getReg());
3715   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3716   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3717     Error(getRegLoc(Reg, Operands),
3718           "source operand must be either a VGPR or an inline constant");
3719     return false;
3720   }
3721 
3722   return true;
3723 }
3724 
3725 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3726                                    const OperandVector &Operands) {
3727   const unsigned Opc = Inst.getOpcode();
3728   const MCInstrDesc &Desc = MII.get(Opc);
3729 
3730   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3731     return true;
3732 
3733   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3734   if (Src2Idx == -1)
3735     return true;
3736 
3737   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3738   if (!Src2.isReg())
3739     return true;
3740 
3741   MCRegister Src2Reg = Src2.getReg();
3742   MCRegister DstReg = Inst.getOperand(0).getReg();
3743   if (Src2Reg == DstReg)
3744     return true;
3745 
3746   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3747   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3748     return true;
3749 
3750   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3751     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3752           "source 2 operand must not partially overlap with dst");
3753     return false;
3754   }
3755 
3756   return true;
3757 }
3758 
3759 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3760   switch (Inst.getOpcode()) {
3761   default:
3762     return true;
3763   case V_DIV_SCALE_F32_gfx6_gfx7:
3764   case V_DIV_SCALE_F32_vi:
3765   case V_DIV_SCALE_F32_gfx10:
3766   case V_DIV_SCALE_F64_gfx6_gfx7:
3767   case V_DIV_SCALE_F64_vi:
3768   case V_DIV_SCALE_F64_gfx10:
3769     break;
3770   }
3771 
3772   // TODO: Check that src0 = src1 or src2.
3773 
3774   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3775                     AMDGPU::OpName::src2_modifiers,
3776                     AMDGPU::OpName::src2_modifiers}) {
3777     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3778             .getImm() &
3779         SISrcMods::ABS) {
3780       return false;
3781     }
3782   }
3783 
3784   return true;
3785 }
3786 
3787 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3788 
3789   const unsigned Opc = Inst.getOpcode();
3790   const MCInstrDesc &Desc = MII.get(Opc);
3791 
3792   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3793     return true;
3794 
3795   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3796   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3797     if (isCI() || isSI())
3798       return false;
3799   }
3800 
3801   return true;
3802 }
3803 
3804 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3805   const unsigned Opc = Inst.getOpcode();
3806   const MCInstrDesc &Desc = MII.get(Opc);
3807 
3808   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3809     return true;
3810 
3811   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3812   if (DimIdx < 0)
3813     return true;
3814 
3815   long Imm = Inst.getOperand(DimIdx).getImm();
3816   if (Imm < 0 || Imm >= 8)
3817     return false;
3818 
3819   return true;
3820 }
3821 
3822 static bool IsRevOpcode(const unsigned Opcode)
3823 {
3824   switch (Opcode) {
3825   case AMDGPU::V_SUBREV_F32_e32:
3826   case AMDGPU::V_SUBREV_F32_e64:
3827   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3828   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3829   case AMDGPU::V_SUBREV_F32_e32_vi:
3830   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3831   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3832   case AMDGPU::V_SUBREV_F32_e64_vi:
3833 
3834   case AMDGPU::V_SUBREV_CO_U32_e32:
3835   case AMDGPU::V_SUBREV_CO_U32_e64:
3836   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3837   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3838 
3839   case AMDGPU::V_SUBBREV_U32_e32:
3840   case AMDGPU::V_SUBBREV_U32_e64:
3841   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3842   case AMDGPU::V_SUBBREV_U32_e32_vi:
3843   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3844   case AMDGPU::V_SUBBREV_U32_e64_vi:
3845 
3846   case AMDGPU::V_SUBREV_U32_e32:
3847   case AMDGPU::V_SUBREV_U32_e64:
3848   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3849   case AMDGPU::V_SUBREV_U32_e32_vi:
3850   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3851   case AMDGPU::V_SUBREV_U32_e64_vi:
3852 
3853   case AMDGPU::V_SUBREV_F16_e32:
3854   case AMDGPU::V_SUBREV_F16_e64:
3855   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3856   case AMDGPU::V_SUBREV_F16_e32_vi:
3857   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3858   case AMDGPU::V_SUBREV_F16_e64_vi:
3859 
3860   case AMDGPU::V_SUBREV_U16_e32:
3861   case AMDGPU::V_SUBREV_U16_e64:
3862   case AMDGPU::V_SUBREV_U16_e32_vi:
3863   case AMDGPU::V_SUBREV_U16_e64_vi:
3864 
3865   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3866   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3867   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3868 
3869   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3870   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3871 
3872   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3873   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3874 
3875   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3876   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3877 
3878   case AMDGPU::V_LSHRREV_B32_e32:
3879   case AMDGPU::V_LSHRREV_B32_e64:
3880   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3881   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3882   case AMDGPU::V_LSHRREV_B32_e32_vi:
3883   case AMDGPU::V_LSHRREV_B32_e64_vi:
3884   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3885   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3886 
3887   case AMDGPU::V_ASHRREV_I32_e32:
3888   case AMDGPU::V_ASHRREV_I32_e64:
3889   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3890   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3891   case AMDGPU::V_ASHRREV_I32_e32_vi:
3892   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3893   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3894   case AMDGPU::V_ASHRREV_I32_e64_vi:
3895 
3896   case AMDGPU::V_LSHLREV_B32_e32:
3897   case AMDGPU::V_LSHLREV_B32_e64:
3898   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3899   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3900   case AMDGPU::V_LSHLREV_B32_e32_vi:
3901   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3902   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3903   case AMDGPU::V_LSHLREV_B32_e64_vi:
3904 
3905   case AMDGPU::V_LSHLREV_B16_e32:
3906   case AMDGPU::V_LSHLREV_B16_e64:
3907   case AMDGPU::V_LSHLREV_B16_e32_vi:
3908   case AMDGPU::V_LSHLREV_B16_e64_vi:
3909   case AMDGPU::V_LSHLREV_B16_gfx10:
3910 
3911   case AMDGPU::V_LSHRREV_B16_e32:
3912   case AMDGPU::V_LSHRREV_B16_e64:
3913   case AMDGPU::V_LSHRREV_B16_e32_vi:
3914   case AMDGPU::V_LSHRREV_B16_e64_vi:
3915   case AMDGPU::V_LSHRREV_B16_gfx10:
3916 
3917   case AMDGPU::V_ASHRREV_I16_e32:
3918   case AMDGPU::V_ASHRREV_I16_e64:
3919   case AMDGPU::V_ASHRREV_I16_e32_vi:
3920   case AMDGPU::V_ASHRREV_I16_e64_vi:
3921   case AMDGPU::V_ASHRREV_I16_gfx10:
3922 
3923   case AMDGPU::V_LSHLREV_B64_e64:
3924   case AMDGPU::V_LSHLREV_B64_gfx10:
3925   case AMDGPU::V_LSHLREV_B64_vi:
3926 
3927   case AMDGPU::V_LSHRREV_B64_e64:
3928   case AMDGPU::V_LSHRREV_B64_gfx10:
3929   case AMDGPU::V_LSHRREV_B64_vi:
3930 
3931   case AMDGPU::V_ASHRREV_I64_e64:
3932   case AMDGPU::V_ASHRREV_I64_gfx10:
3933   case AMDGPU::V_ASHRREV_I64_vi:
3934 
3935   case AMDGPU::V_PK_LSHLREV_B16:
3936   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3937   case AMDGPU::V_PK_LSHLREV_B16_vi:
3938 
3939   case AMDGPU::V_PK_LSHRREV_B16:
3940   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3941   case AMDGPU::V_PK_LSHRREV_B16_vi:
3942   case AMDGPU::V_PK_ASHRREV_I16:
3943   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3944   case AMDGPU::V_PK_ASHRREV_I16_vi:
3945     return true;
3946   default:
3947     return false;
3948   }
3949 }
3950 
3951 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3952 
3953   using namespace SIInstrFlags;
3954   const unsigned Opcode = Inst.getOpcode();
3955   const MCInstrDesc &Desc = MII.get(Opcode);
3956 
3957   // lds_direct register is defined so that it can be used
3958   // with 9-bit operands only. Ignore encodings which do not accept these.
3959   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3960   if ((Desc.TSFlags & Enc) == 0)
3961     return None;
3962 
3963   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3964     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3965     if (SrcIdx == -1)
3966       break;
3967     const auto &Src = Inst.getOperand(SrcIdx);
3968     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3969 
3970       if (isGFX90A())
3971         return StringRef("lds_direct is not supported on this GPU");
3972 
3973       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3974         return StringRef("lds_direct cannot be used with this instruction");
3975 
3976       if (SrcName != OpName::src0)
3977         return StringRef("lds_direct may be used as src0 only");
3978     }
3979   }
3980 
3981   return None;
3982 }
3983 
3984 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3985   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3986     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3987     if (Op.isFlatOffset())
3988       return Op.getStartLoc();
3989   }
3990   return getLoc();
3991 }
3992 
3993 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3994                                          const OperandVector &Operands) {
3995   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3996   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3997     return true;
3998 
3999   auto Opcode = Inst.getOpcode();
4000   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4001   assert(OpNum != -1);
4002 
4003   const auto &Op = Inst.getOperand(OpNum);
4004   if (!hasFlatOffsets() && Op.getImm() != 0) {
4005     Error(getFlatOffsetLoc(Operands),
4006           "flat offset modifier is not supported on this GPU");
4007     return false;
4008   }
4009 
4010   // For FLAT segment the offset must be positive;
4011   // MSB is ignored and forced to zero.
4012   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4013     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4014     if (!isIntN(OffsetSize, Op.getImm())) {
4015       Error(getFlatOffsetLoc(Operands),
4016             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4017       return false;
4018     }
4019   } else {
4020     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4021     if (!isUIntN(OffsetSize, Op.getImm())) {
4022       Error(getFlatOffsetLoc(Operands),
4023             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4024       return false;
4025     }
4026   }
4027 
4028   return true;
4029 }
4030 
4031 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4032   // Start with second operand because SMEM Offset cannot be dst or src0.
4033   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4034     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4035     if (Op.isSMEMOffset())
4036       return Op.getStartLoc();
4037   }
4038   return getLoc();
4039 }
4040 
4041 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4042                                          const OperandVector &Operands) {
4043   if (isCI() || isSI())
4044     return true;
4045 
4046   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4047   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4048     return true;
4049 
4050   auto Opcode = Inst.getOpcode();
4051   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4052   if (OpNum == -1)
4053     return true;
4054 
4055   const auto &Op = Inst.getOperand(OpNum);
4056   if (!Op.isImm())
4057     return true;
4058 
4059   uint64_t Offset = Op.getImm();
4060   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4061   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4062       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4063     return true;
4064 
4065   Error(getSMEMOffsetLoc(Operands),
4066         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4067                                "expected a 21-bit signed offset");
4068 
4069   return false;
4070 }
4071 
4072 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4073   unsigned Opcode = Inst.getOpcode();
4074   const MCInstrDesc &Desc = MII.get(Opcode);
4075   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4076     return true;
4077 
4078   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4079   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4080 
4081   const int OpIndices[] = { Src0Idx, Src1Idx };
4082 
4083   unsigned NumExprs = 0;
4084   unsigned NumLiterals = 0;
4085   uint32_t LiteralValue;
4086 
4087   for (int OpIdx : OpIndices) {
4088     if (OpIdx == -1) break;
4089 
4090     const MCOperand &MO = Inst.getOperand(OpIdx);
4091     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4092     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4093       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4094         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4095         if (NumLiterals == 0 || LiteralValue != Value) {
4096           LiteralValue = Value;
4097           ++NumLiterals;
4098         }
4099       } else if (MO.isExpr()) {
4100         ++NumExprs;
4101       }
4102     }
4103   }
4104 
4105   return NumLiterals + NumExprs <= 1;
4106 }
4107 
4108 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4109   const unsigned Opc = Inst.getOpcode();
4110   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4111       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4112     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4113     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4114 
4115     if (OpSel & ~3)
4116       return false;
4117   }
4118 
4119   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4120     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4121     if (OpSelIdx != -1) {
4122       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4123         return false;
4124     }
4125     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4126     if (OpSelHiIdx != -1) {
4127       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4128         return false;
4129     }
4130   }
4131 
4132   return true;
4133 }
4134 
4135 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4136                                   const OperandVector &Operands) {
4137   const unsigned Opc = Inst.getOpcode();
4138   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4139   if (DppCtrlIdx < 0)
4140     return true;
4141   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4142 
4143   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4144     // DPP64 is supported for row_newbcast only.
4145     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4146     if (Src0Idx >= 0 &&
4147         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4148       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4149       Error(S, "64 bit dpp only supports row_newbcast");
4150       return false;
4151     }
4152   }
4153 
4154   return true;
4155 }
4156 
4157 // Check if VCC register matches wavefront size
4158 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4159   auto FB = getFeatureBits();
4160   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4161     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4162 }
4163 
4164 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4165 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4166                                          const OperandVector &Operands) {
4167   unsigned Opcode = Inst.getOpcode();
4168   const MCInstrDesc &Desc = MII.get(Opcode);
4169   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4170   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4171       ImmIdx == -1)
4172     return true;
4173 
4174   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4175   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4176   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4177 
4178   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4179 
4180   unsigned NumExprs = 0;
4181   unsigned NumLiterals = 0;
4182   uint32_t LiteralValue;
4183 
4184   for (int OpIdx : OpIndices) {
4185     if (OpIdx == -1)
4186       continue;
4187 
4188     const MCOperand &MO = Inst.getOperand(OpIdx);
4189     if (!MO.isImm() && !MO.isExpr())
4190       continue;
4191     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4192       continue;
4193 
4194     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4195         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4196       Error(getConstLoc(Operands),
4197             "inline constants are not allowed for this operand");
4198       return false;
4199     }
4200 
4201     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4202       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4203       if (NumLiterals == 0 || LiteralValue != Value) {
4204         LiteralValue = Value;
4205         ++NumLiterals;
4206       }
4207     } else if (MO.isExpr()) {
4208       ++NumExprs;
4209     }
4210   }
4211   NumLiterals += NumExprs;
4212 
4213   if (!NumLiterals)
4214     return true;
4215 
4216   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4217     Error(getLitLoc(Operands), "literal operands are not supported");
4218     return false;
4219   }
4220 
4221   if (NumLiterals > 1) {
4222     Error(getLitLoc(Operands), "only one literal operand is allowed");
4223     return false;
4224   }
4225 
4226   return true;
4227 }
4228 
4229 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4230 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4231                          const MCRegisterInfo *MRI) {
4232   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4233   if (OpIdx < 0)
4234     return -1;
4235 
4236   const MCOperand &Op = Inst.getOperand(OpIdx);
4237   if (!Op.isReg())
4238     return -1;
4239 
4240   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4241   auto Reg = Sub ? Sub : Op.getReg();
4242   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4243   return AGPR32.contains(Reg) ? 1 : 0;
4244 }
4245 
4246 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4247   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4248   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4249                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4250                   SIInstrFlags::DS)) == 0)
4251     return true;
4252 
4253   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4254                                                       : AMDGPU::OpName::vdata;
4255 
4256   const MCRegisterInfo *MRI = getMRI();
4257   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4258   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4259 
4260   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4261     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4262     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4263       return false;
4264   }
4265 
4266   auto FB = getFeatureBits();
4267   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4268     if (DataAreg < 0 || DstAreg < 0)
4269       return true;
4270     return DstAreg == DataAreg;
4271   }
4272 
4273   return DstAreg < 1 && DataAreg < 1;
4274 }
4275 
4276 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4277   auto FB = getFeatureBits();
4278   if (!FB[AMDGPU::FeatureGFX90AInsts])
4279     return true;
4280 
4281   const MCRegisterInfo *MRI = getMRI();
4282   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4283   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4284   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4285     const MCOperand &Op = Inst.getOperand(I);
4286     if (!Op.isReg())
4287       continue;
4288 
4289     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4290     if (!Sub)
4291       continue;
4292 
4293     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4294       return false;
4295     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4296       return false;
4297   }
4298 
4299   return true;
4300 }
4301 
4302 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4303   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4304     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4305     if (Op.isBLGP())
4306       return Op.getStartLoc();
4307   }
4308   return SMLoc();
4309 }
4310 
4311 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4312                                    const OperandVector &Operands) {
4313   unsigned Opc = Inst.getOpcode();
4314   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4315   if (BlgpIdx == -1)
4316     return true;
4317   SMLoc BLGPLoc = getBLGPLoc(Operands);
4318   if (!BLGPLoc.isValid())
4319     return true;
4320   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4321   auto FB = getFeatureBits();
4322   bool UsesNeg = false;
4323   if (FB[AMDGPU::FeatureGFX940Insts]) {
4324     switch (Opc) {
4325     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4326     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4327     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4328     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4329       UsesNeg = true;
4330     }
4331   }
4332 
4333   if (IsNeg == UsesNeg)
4334     return true;
4335 
4336   Error(BLGPLoc,
4337         UsesNeg ? "invalid modifier: blgp is not supported"
4338                 : "invalid modifier: neg is not supported");
4339 
4340   return false;
4341 }
4342 
4343 // gfx90a has an undocumented limitation:
4344 // DS_GWS opcodes must use even aligned registers.
4345 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4346                                   const OperandVector &Operands) {
4347   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4348     return true;
4349 
4350   int Opc = Inst.getOpcode();
4351   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4352       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4353     return true;
4354 
4355   const MCRegisterInfo *MRI = getMRI();
4356   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4357   int Data0Pos =
4358       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4359   assert(Data0Pos != -1);
4360   auto Reg = Inst.getOperand(Data0Pos).getReg();
4361   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4362   if (RegIdx & 1) {
4363     SMLoc RegLoc = getRegLoc(Reg, Operands);
4364     Error(RegLoc, "vgpr must be even aligned");
4365     return false;
4366   }
4367 
4368   return true;
4369 }
4370 
4371 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4372                                             const OperandVector &Operands,
4373                                             const SMLoc &IDLoc) {
4374   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4375                                            AMDGPU::OpName::cpol);
4376   if (CPolPos == -1)
4377     return true;
4378 
4379   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4380 
4381   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4382   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4383       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4384     Error(IDLoc, "invalid cache policy for SMRD instruction");
4385     return false;
4386   }
4387 
4388   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4389     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4390     StringRef CStr(S.getPointer());
4391     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4392     Error(S, "scc is not supported on this GPU");
4393     return false;
4394   }
4395 
4396   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4397     return true;
4398 
4399   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4400     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4401       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4402                               : "instruction must use glc");
4403       return false;
4404     }
4405   } else {
4406     if (CPol & CPol::GLC) {
4407       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4408       StringRef CStr(S.getPointer());
4409       S = SMLoc::getFromPointer(
4410           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4411       Error(S, isGFX940() ? "instruction must not use sc0"
4412                           : "instruction must not use glc");
4413       return false;
4414     }
4415   }
4416 
4417   return true;
4418 }
4419 
4420 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4421                                           const SMLoc &IDLoc,
4422                                           const OperandVector &Operands) {
4423   if (auto ErrMsg = validateLdsDirect(Inst)) {
4424     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4425     return false;
4426   }
4427   if (!validateSOPLiteral(Inst)) {
4428     Error(getLitLoc(Operands),
4429       "only one literal operand is allowed");
4430     return false;
4431   }
4432   if (!validateVOPLiteral(Inst, Operands)) {
4433     return false;
4434   }
4435   if (!validateConstantBusLimitations(Inst, Operands)) {
4436     return false;
4437   }
4438   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4439     return false;
4440   }
4441   if (!validateIntClampSupported(Inst)) {
4442     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4443       "integer clamping is not supported on this GPU");
4444     return false;
4445   }
4446   if (!validateOpSel(Inst)) {
4447     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4448       "invalid op_sel operand");
4449     return false;
4450   }
4451   if (!validateDPP(Inst, Operands)) {
4452     return false;
4453   }
4454   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4455   if (!validateMIMGD16(Inst)) {
4456     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4457       "d16 modifier is not supported on this GPU");
4458     return false;
4459   }
4460   if (!validateMIMGDim(Inst)) {
4461     Error(IDLoc, "dim modifier is required on this GPU");
4462     return false;
4463   }
4464   if (!validateMIMGMSAA(Inst)) {
4465     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4466           "invalid dim; must be MSAA type");
4467     return false;
4468   }
4469   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4470     Error(IDLoc, *ErrMsg);
4471     return false;
4472   }
4473   if (!validateMIMGAddrSize(Inst)) {
4474     Error(IDLoc,
4475       "image address size does not match dim and a16");
4476     return false;
4477   }
4478   if (!validateMIMGAtomicDMask(Inst)) {
4479     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4480       "invalid atomic image dmask");
4481     return false;
4482   }
4483   if (!validateMIMGGatherDMask(Inst)) {
4484     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4485       "invalid image_gather dmask: only one bit must be set");
4486     return false;
4487   }
4488   if (!validateMovrels(Inst, Operands)) {
4489     return false;
4490   }
4491   if (!validateFlatOffset(Inst, Operands)) {
4492     return false;
4493   }
4494   if (!validateSMEMOffset(Inst, Operands)) {
4495     return false;
4496   }
4497   if (!validateMAIAccWrite(Inst, Operands)) {
4498     return false;
4499   }
4500   if (!validateMFMA(Inst, Operands)) {
4501     return false;
4502   }
4503   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4504     return false;
4505   }
4506 
4507   if (!validateAGPRLdSt(Inst)) {
4508     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4509     ? "invalid register class: data and dst should be all VGPR or AGPR"
4510     : "invalid register class: agpr loads and stores not supported on this GPU"
4511     );
4512     return false;
4513   }
4514   if (!validateVGPRAlign(Inst)) {
4515     Error(IDLoc,
4516       "invalid register class: vgpr tuples must be 64 bit aligned");
4517     return false;
4518   }
4519   if (!validateGWS(Inst, Operands)) {
4520     return false;
4521   }
4522 
4523   if (!validateBLGP(Inst, Operands)) {
4524     return false;
4525   }
4526 
4527   if (!validateDivScale(Inst)) {
4528     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4529     return false;
4530   }
4531   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4532     return false;
4533   }
4534 
4535   return true;
4536 }
4537 
4538 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4539                                             const FeatureBitset &FBS,
4540                                             unsigned VariantID = 0);
4541 
4542 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4543                                 const FeatureBitset &AvailableFeatures,
4544                                 unsigned VariantID);
4545 
4546 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4547                                        const FeatureBitset &FBS) {
4548   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4549 }
4550 
4551 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4552                                        const FeatureBitset &FBS,
4553                                        ArrayRef<unsigned> Variants) {
4554   for (auto Variant : Variants) {
4555     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4556       return true;
4557   }
4558 
4559   return false;
4560 }
4561 
4562 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4563                                                   const SMLoc &IDLoc) {
4564   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4565 
4566   // Check if requested instruction variant is supported.
4567   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4568     return false;
4569 
4570   // This instruction is not supported.
4571   // Clear any other pending errors because they are no longer relevant.
4572   getParser().clearPendingErrors();
4573 
4574   // Requested instruction variant is not supported.
4575   // Check if any other variants are supported.
4576   StringRef VariantName = getMatchedVariantName();
4577   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4578     return Error(IDLoc,
4579                  Twine(VariantName,
4580                        " variant of this instruction is not supported"));
4581   }
4582 
4583   // Finally check if this instruction is supported on any other GPU.
4584   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4585     return Error(IDLoc, "instruction not supported on this GPU");
4586   }
4587 
4588   // Instruction not supported on any GPU. Probably a typo.
4589   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4590   return Error(IDLoc, "invalid instruction" + Suggestion);
4591 }
4592 
4593 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4594                                               OperandVector &Operands,
4595                                               MCStreamer &Out,
4596                                               uint64_t &ErrorInfo,
4597                                               bool MatchingInlineAsm) {
4598   MCInst Inst;
4599   unsigned Result = Match_Success;
4600   for (auto Variant : getMatchedVariants()) {
4601     uint64_t EI;
4602     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4603                                   Variant);
4604     // We order match statuses from least to most specific. We use most specific
4605     // status as resulting
4606     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4607     if ((R == Match_Success) ||
4608         (R == Match_PreferE32) ||
4609         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4610         (R == Match_InvalidOperand && Result != Match_MissingFeature
4611                                    && Result != Match_PreferE32) ||
4612         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4613                                    && Result != Match_MissingFeature
4614                                    && Result != Match_PreferE32)) {
4615       Result = R;
4616       ErrorInfo = EI;
4617     }
4618     if (R == Match_Success)
4619       break;
4620   }
4621 
4622   if (Result == Match_Success) {
4623     if (!validateInstruction(Inst, IDLoc, Operands)) {
4624       return true;
4625     }
4626     Inst.setLoc(IDLoc);
4627     Out.emitInstruction(Inst, getSTI());
4628     return false;
4629   }
4630 
4631   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4632   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4633     return true;
4634   }
4635 
4636   switch (Result) {
4637   default: break;
4638   case Match_MissingFeature:
4639     // It has been verified that the specified instruction
4640     // mnemonic is valid. A match was found but it requires
4641     // features which are not supported on this GPU.
4642     return Error(IDLoc, "operands are not valid for this GPU or mode");
4643 
4644   case Match_InvalidOperand: {
4645     SMLoc ErrorLoc = IDLoc;
4646     if (ErrorInfo != ~0ULL) {
4647       if (ErrorInfo >= Operands.size()) {
4648         return Error(IDLoc, "too few operands for instruction");
4649       }
4650       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4651       if (ErrorLoc == SMLoc())
4652         ErrorLoc = IDLoc;
4653     }
4654     return Error(ErrorLoc, "invalid operand for instruction");
4655   }
4656 
4657   case Match_PreferE32:
4658     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4659                         "should be encoded as e32");
4660   case Match_MnemonicFail:
4661     llvm_unreachable("Invalid instructions should have been handled already");
4662   }
4663   llvm_unreachable("Implement any new match types added!");
4664 }
4665 
4666 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4667   int64_t Tmp = -1;
4668   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4669     return true;
4670   }
4671   if (getParser().parseAbsoluteExpression(Tmp)) {
4672     return true;
4673   }
4674   Ret = static_cast<uint32_t>(Tmp);
4675   return false;
4676 }
4677 
4678 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4679                                                uint32_t &Minor) {
4680   if (ParseAsAbsoluteExpression(Major))
4681     return TokError("invalid major version");
4682 
4683   if (!trySkipToken(AsmToken::Comma))
4684     return TokError("minor version number required, comma expected");
4685 
4686   if (ParseAsAbsoluteExpression(Minor))
4687     return TokError("invalid minor version");
4688 
4689   return false;
4690 }
4691 
4692 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4693   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4694     return TokError("directive only supported for amdgcn architecture");
4695 
4696   std::string TargetIDDirective;
4697   SMLoc TargetStart = getTok().getLoc();
4698   if (getParser().parseEscapedString(TargetIDDirective))
4699     return true;
4700 
4701   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4702   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4703     return getParser().Error(TargetRange.Start,
4704         (Twine(".amdgcn_target directive's target id ") +
4705          Twine(TargetIDDirective) +
4706          Twine(" does not match the specified target id ") +
4707          Twine(getTargetStreamer().getTargetID()->toString())).str());
4708 
4709   return false;
4710 }
4711 
4712 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4713   return Error(Range.Start, "value out of range", Range);
4714 }
4715 
4716 bool AMDGPUAsmParser::calculateGPRBlocks(
4717     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4718     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4719     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4720     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4721   // TODO(scott.linder): These calculations are duplicated from
4722   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4723   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4724 
4725   unsigned NumVGPRs = NextFreeVGPR;
4726   unsigned NumSGPRs = NextFreeSGPR;
4727 
4728   if (Version.Major >= 10)
4729     NumSGPRs = 0;
4730   else {
4731     unsigned MaxAddressableNumSGPRs =
4732         IsaInfo::getAddressableNumSGPRs(&getSTI());
4733 
4734     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4735         NumSGPRs > MaxAddressableNumSGPRs)
4736       return OutOfRangeError(SGPRRange);
4737 
4738     NumSGPRs +=
4739         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4740 
4741     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4742         NumSGPRs > MaxAddressableNumSGPRs)
4743       return OutOfRangeError(SGPRRange);
4744 
4745     if (Features.test(FeatureSGPRInitBug))
4746       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4747   }
4748 
4749   VGPRBlocks =
4750       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4751   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4752 
4753   return false;
4754 }
4755 
4756 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4757   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4758     return TokError("directive only supported for amdgcn architecture");
4759 
4760   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4761     return TokError("directive only supported for amdhsa OS");
4762 
4763   StringRef KernelName;
4764   if (getParser().parseIdentifier(KernelName))
4765     return true;
4766 
4767   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4768 
4769   StringSet<> Seen;
4770 
4771   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4772 
4773   SMRange VGPRRange;
4774   uint64_t NextFreeVGPR = 0;
4775   uint64_t AccumOffset = 0;
4776   uint64_t SharedVGPRCount = 0;
4777   SMRange SGPRRange;
4778   uint64_t NextFreeSGPR = 0;
4779 
4780   // Count the number of user SGPRs implied from the enabled feature bits.
4781   unsigned ImpliedUserSGPRCount = 0;
4782 
4783   // Track if the asm explicitly contains the directive for the user SGPR
4784   // count.
4785   Optional<unsigned> ExplicitUserSGPRCount;
4786   bool ReserveVCC = true;
4787   bool ReserveFlatScr = true;
4788   Optional<bool> EnableWavefrontSize32;
4789 
4790   while (true) {
4791     while (trySkipToken(AsmToken::EndOfStatement));
4792 
4793     StringRef ID;
4794     SMRange IDRange = getTok().getLocRange();
4795     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4796       return true;
4797 
4798     if (ID == ".end_amdhsa_kernel")
4799       break;
4800 
4801     if (Seen.find(ID) != Seen.end())
4802       return TokError(".amdhsa_ directives cannot be repeated");
4803     Seen.insert(ID);
4804 
4805     SMLoc ValStart = getLoc();
4806     int64_t IVal;
4807     if (getParser().parseAbsoluteExpression(IVal))
4808       return true;
4809     SMLoc ValEnd = getLoc();
4810     SMRange ValRange = SMRange(ValStart, ValEnd);
4811 
4812     if (IVal < 0)
4813       return OutOfRangeError(ValRange);
4814 
4815     uint64_t Val = IVal;
4816 
4817 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4818   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4819     return OutOfRangeError(RANGE);                                             \
4820   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4821 
4822     if (ID == ".amdhsa_group_segment_fixed_size") {
4823       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4824         return OutOfRangeError(ValRange);
4825       KD.group_segment_fixed_size = Val;
4826     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4827       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4828         return OutOfRangeError(ValRange);
4829       KD.private_segment_fixed_size = Val;
4830     } else if (ID == ".amdhsa_kernarg_size") {
4831       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4832         return OutOfRangeError(ValRange);
4833       KD.kernarg_size = Val;
4834     } else if (ID == ".amdhsa_user_sgpr_count") {
4835       ExplicitUserSGPRCount = Val;
4836     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4837       if (hasArchitectedFlatScratch())
4838         return Error(IDRange.Start,
4839                      "directive is not supported with architected flat scratch",
4840                      IDRange);
4841       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4842                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4843                        Val, ValRange);
4844       if (Val)
4845         ImpliedUserSGPRCount += 4;
4846     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4847       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4848                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4849                        ValRange);
4850       if (Val)
4851         ImpliedUserSGPRCount += 2;
4852     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4853       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4854                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4855                        ValRange);
4856       if (Val)
4857         ImpliedUserSGPRCount += 2;
4858     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4859       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4860                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4861                        Val, ValRange);
4862       if (Val)
4863         ImpliedUserSGPRCount += 2;
4864     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4865       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4866                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4867                        ValRange);
4868       if (Val)
4869         ImpliedUserSGPRCount += 2;
4870     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4871       if (hasArchitectedFlatScratch())
4872         return Error(IDRange.Start,
4873                      "directive is not supported with architected flat scratch",
4874                      IDRange);
4875       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4876                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4877                        ValRange);
4878       if (Val)
4879         ImpliedUserSGPRCount += 2;
4880     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4881       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4882                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4883                        Val, ValRange);
4884       if (Val)
4885         ImpliedUserSGPRCount += 1;
4886     } else if (ID == ".amdhsa_wavefront_size32") {
4887       if (IVersion.Major < 10)
4888         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4889       EnableWavefrontSize32 = Val;
4890       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4891                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4892                        Val, ValRange);
4893     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4894       if (hasArchitectedFlatScratch())
4895         return Error(IDRange.Start,
4896                      "directive is not supported with architected flat scratch",
4897                      IDRange);
4898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4899                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4900     } else if (ID == ".amdhsa_enable_private_segment") {
4901       if (!hasArchitectedFlatScratch())
4902         return Error(
4903             IDRange.Start,
4904             "directive is not supported without architected flat scratch",
4905             IDRange);
4906       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4907                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4908     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4909       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4910                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4911                        ValRange);
4912     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4913       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4914                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4915                        ValRange);
4916     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4917       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4918                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4919                        ValRange);
4920     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4921       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4922                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4923                        ValRange);
4924     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4925       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4926                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4927                        ValRange);
4928     } else if (ID == ".amdhsa_next_free_vgpr") {
4929       VGPRRange = ValRange;
4930       NextFreeVGPR = Val;
4931     } else if (ID == ".amdhsa_next_free_sgpr") {
4932       SGPRRange = ValRange;
4933       NextFreeSGPR = Val;
4934     } else if (ID == ".amdhsa_accum_offset") {
4935       if (!isGFX90A())
4936         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4937       AccumOffset = Val;
4938     } else if (ID == ".amdhsa_reserve_vcc") {
4939       if (!isUInt<1>(Val))
4940         return OutOfRangeError(ValRange);
4941       ReserveVCC = Val;
4942     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4943       if (IVersion.Major < 7)
4944         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4945       if (hasArchitectedFlatScratch())
4946         return Error(IDRange.Start,
4947                      "directive is not supported with architected flat scratch",
4948                      IDRange);
4949       if (!isUInt<1>(Val))
4950         return OutOfRangeError(ValRange);
4951       ReserveFlatScr = Val;
4952     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4953       if (IVersion.Major < 8)
4954         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4955       if (!isUInt<1>(Val))
4956         return OutOfRangeError(ValRange);
4957       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4958         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4959                                  IDRange);
4960     } else if (ID == ".amdhsa_float_round_mode_32") {
4961       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4962                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4963     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4964       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4965                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4966     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4967       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4968                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4969     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4970       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4971                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4972                        ValRange);
4973     } else if (ID == ".amdhsa_dx10_clamp") {
4974       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4975                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4976     } else if (ID == ".amdhsa_ieee_mode") {
4977       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4978                        Val, ValRange);
4979     } else if (ID == ".amdhsa_fp16_overflow") {
4980       if (IVersion.Major < 9)
4981         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4982       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4983                        ValRange);
4984     } else if (ID == ".amdhsa_tg_split") {
4985       if (!isGFX90A())
4986         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4987       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4988                        ValRange);
4989     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4990       if (IVersion.Major < 10)
4991         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4992       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4993                        ValRange);
4994     } else if (ID == ".amdhsa_memory_ordered") {
4995       if (IVersion.Major < 10)
4996         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4997       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4998                        ValRange);
4999     } else if (ID == ".amdhsa_forward_progress") {
5000       if (IVersion.Major < 10)
5001         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5002       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5003                        ValRange);
5004     } else if (ID == ".amdhsa_shared_vgpr_count") {
5005       if (IVersion.Major < 10)
5006         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5007       SharedVGPRCount = Val;
5008       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5009                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5010                        ValRange);
5011     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5012       PARSE_BITS_ENTRY(
5013           KD.compute_pgm_rsrc2,
5014           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5015           ValRange);
5016     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5018                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5019                        Val, ValRange);
5020     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5021       PARSE_BITS_ENTRY(
5022           KD.compute_pgm_rsrc2,
5023           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5024           ValRange);
5025     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5028                        Val, ValRange);
5029     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5031                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5032                        Val, ValRange);
5033     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5035                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5036                        Val, ValRange);
5037     } else if (ID == ".amdhsa_exception_int_div_zero") {
5038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5039                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5040                        Val, ValRange);
5041     } else {
5042       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5043     }
5044 
5045 #undef PARSE_BITS_ENTRY
5046   }
5047 
5048   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5049     return TokError(".amdhsa_next_free_vgpr directive is required");
5050 
5051   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5052     return TokError(".amdhsa_next_free_sgpr directive is required");
5053 
5054   unsigned VGPRBlocks;
5055   unsigned SGPRBlocks;
5056   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5057                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5058                          EnableWavefrontSize32, NextFreeVGPR,
5059                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5060                          SGPRBlocks))
5061     return true;
5062 
5063   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5064           VGPRBlocks))
5065     return OutOfRangeError(VGPRRange);
5066   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5067                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5068 
5069   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5070           SGPRBlocks))
5071     return OutOfRangeError(SGPRRange);
5072   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5073                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5074                   SGPRBlocks);
5075 
5076   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5077     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5078                     "enabled user SGPRs");
5079 
5080   unsigned UserSGPRCount =
5081       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5082 
5083   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5084     return TokError("too many user SGPRs enabled");
5085   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5086                   UserSGPRCount);
5087 
5088   if (isGFX90A()) {
5089     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5090       return TokError(".amdhsa_accum_offset directive is required");
5091     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5092       return TokError("accum_offset should be in range [4..256] in "
5093                       "increments of 4");
5094     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5095       return TokError("accum_offset exceeds total VGPR allocation");
5096     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5097                     (AccumOffset / 4 - 1));
5098   }
5099 
5100   if (IVersion.Major == 10) {
5101     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5102     if (SharedVGPRCount && EnableWavefrontSize32) {
5103       return TokError("shared_vgpr_count directive not valid on "
5104                       "wavefront size 32");
5105     }
5106     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5107       return TokError("shared_vgpr_count*2 + "
5108                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5109                       "exceed 63\n");
5110     }
5111   }
5112 
5113   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5114       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5115       ReserveFlatScr);
5116   return false;
5117 }
5118 
5119 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5120   uint32_t Major;
5121   uint32_t Minor;
5122 
5123   if (ParseDirectiveMajorMinor(Major, Minor))
5124     return true;
5125 
5126   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5127   return false;
5128 }
5129 
5130 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5131   uint32_t Major;
5132   uint32_t Minor;
5133   uint32_t Stepping;
5134   StringRef VendorName;
5135   StringRef ArchName;
5136 
5137   // If this directive has no arguments, then use the ISA version for the
5138   // targeted GPU.
5139   if (isToken(AsmToken::EndOfStatement)) {
5140     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5141     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5142                                                         ISA.Stepping,
5143                                                         "AMD", "AMDGPU");
5144     return false;
5145   }
5146 
5147   if (ParseDirectiveMajorMinor(Major, Minor))
5148     return true;
5149 
5150   if (!trySkipToken(AsmToken::Comma))
5151     return TokError("stepping version number required, comma expected");
5152 
5153   if (ParseAsAbsoluteExpression(Stepping))
5154     return TokError("invalid stepping version");
5155 
5156   if (!trySkipToken(AsmToken::Comma))
5157     return TokError("vendor name required, comma expected");
5158 
5159   if (!parseString(VendorName, "invalid vendor name"))
5160     return true;
5161 
5162   if (!trySkipToken(AsmToken::Comma))
5163     return TokError("arch name required, comma expected");
5164 
5165   if (!parseString(ArchName, "invalid arch name"))
5166     return true;
5167 
5168   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5169                                                       VendorName, ArchName);
5170   return false;
5171 }
5172 
5173 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5174                                                amd_kernel_code_t &Header) {
5175   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5176   // assembly for backwards compatibility.
5177   if (ID == "max_scratch_backing_memory_byte_size") {
5178     Parser.eatToEndOfStatement();
5179     return false;
5180   }
5181 
5182   SmallString<40> ErrStr;
5183   raw_svector_ostream Err(ErrStr);
5184   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5185     return TokError(Err.str());
5186   }
5187   Lex();
5188 
5189   if (ID == "enable_wavefront_size32") {
5190     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5191       if (!isGFX10Plus())
5192         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5193       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5194         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5195     } else {
5196       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5197         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5198     }
5199   }
5200 
5201   if (ID == "wavefront_size") {
5202     if (Header.wavefront_size == 5) {
5203       if (!isGFX10Plus())
5204         return TokError("wavefront_size=5 is only allowed on GFX10+");
5205       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5206         return TokError("wavefront_size=5 requires +WavefrontSize32");
5207     } else if (Header.wavefront_size == 6) {
5208       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5209         return TokError("wavefront_size=6 requires +WavefrontSize64");
5210     }
5211   }
5212 
5213   if (ID == "enable_wgp_mode") {
5214     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5215         !isGFX10Plus())
5216       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5217   }
5218 
5219   if (ID == "enable_mem_ordered") {
5220     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5221         !isGFX10Plus())
5222       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5223   }
5224 
5225   if (ID == "enable_fwd_progress") {
5226     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5227         !isGFX10Plus())
5228       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5229   }
5230 
5231   return false;
5232 }
5233 
5234 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5235   amd_kernel_code_t Header;
5236   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5237 
5238   while (true) {
5239     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5240     // will set the current token to EndOfStatement.
5241     while(trySkipToken(AsmToken::EndOfStatement));
5242 
5243     StringRef ID;
5244     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5245       return true;
5246 
5247     if (ID == ".end_amd_kernel_code_t")
5248       break;
5249 
5250     if (ParseAMDKernelCodeTValue(ID, Header))
5251       return true;
5252   }
5253 
5254   getTargetStreamer().EmitAMDKernelCodeT(Header);
5255 
5256   return false;
5257 }
5258 
5259 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5260   StringRef KernelName;
5261   if (!parseId(KernelName, "expected symbol name"))
5262     return true;
5263 
5264   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5265                                            ELF::STT_AMDGPU_HSA_KERNEL);
5266 
5267   KernelScope.initialize(getContext());
5268   return false;
5269 }
5270 
5271 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5272   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5273     return Error(getLoc(),
5274                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5275                  "architectures");
5276   }
5277 
5278   auto TargetIDDirective = getLexer().getTok().getStringContents();
5279   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5280     return Error(getParser().getTok().getLoc(), "target id must match options");
5281 
5282   getTargetStreamer().EmitISAVersion();
5283   Lex();
5284 
5285   return false;
5286 }
5287 
5288 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5289   const char *AssemblerDirectiveBegin;
5290   const char *AssemblerDirectiveEnd;
5291   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5292       isHsaAbiVersion3AndAbove(&getSTI())
5293           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5294                             HSAMD::V3::AssemblerDirectiveEnd)
5295           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5296                             HSAMD::AssemblerDirectiveEnd);
5297 
5298   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5299     return Error(getLoc(),
5300                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5301                  "not available on non-amdhsa OSes")).str());
5302   }
5303 
5304   std::string HSAMetadataString;
5305   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5306                           HSAMetadataString))
5307     return true;
5308 
5309   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5310     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5311       return Error(getLoc(), "invalid HSA metadata");
5312   } else {
5313     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5314       return Error(getLoc(), "invalid HSA metadata");
5315   }
5316 
5317   return false;
5318 }
5319 
5320 /// Common code to parse out a block of text (typically YAML) between start and
5321 /// end directives.
5322 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5323                                           const char *AssemblerDirectiveEnd,
5324                                           std::string &CollectString) {
5325 
5326   raw_string_ostream CollectStream(CollectString);
5327 
5328   getLexer().setSkipSpace(false);
5329 
5330   bool FoundEnd = false;
5331   while (!isToken(AsmToken::Eof)) {
5332     while (isToken(AsmToken::Space)) {
5333       CollectStream << getTokenStr();
5334       Lex();
5335     }
5336 
5337     if (trySkipId(AssemblerDirectiveEnd)) {
5338       FoundEnd = true;
5339       break;
5340     }
5341 
5342     CollectStream << Parser.parseStringToEndOfStatement()
5343                   << getContext().getAsmInfo()->getSeparatorString();
5344 
5345     Parser.eatToEndOfStatement();
5346   }
5347 
5348   getLexer().setSkipSpace(true);
5349 
5350   if (isToken(AsmToken::Eof) && !FoundEnd) {
5351     return TokError(Twine("expected directive ") +
5352                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5353   }
5354 
5355   CollectStream.flush();
5356   return false;
5357 }
5358 
5359 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5360 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5361   std::string String;
5362   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5363                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5364     return true;
5365 
5366   auto PALMetadata = getTargetStreamer().getPALMetadata();
5367   if (!PALMetadata->setFromString(String))
5368     return Error(getLoc(), "invalid PAL metadata");
5369   return false;
5370 }
5371 
5372 /// Parse the assembler directive for old linear-format PAL metadata.
5373 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5374   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5375     return Error(getLoc(),
5376                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5377                  "not available on non-amdpal OSes")).str());
5378   }
5379 
5380   auto PALMetadata = getTargetStreamer().getPALMetadata();
5381   PALMetadata->setLegacy();
5382   for (;;) {
5383     uint32_t Key, Value;
5384     if (ParseAsAbsoluteExpression(Key)) {
5385       return TokError(Twine("invalid value in ") +
5386                       Twine(PALMD::AssemblerDirective));
5387     }
5388     if (!trySkipToken(AsmToken::Comma)) {
5389       return TokError(Twine("expected an even number of values in ") +
5390                       Twine(PALMD::AssemblerDirective));
5391     }
5392     if (ParseAsAbsoluteExpression(Value)) {
5393       return TokError(Twine("invalid value in ") +
5394                       Twine(PALMD::AssemblerDirective));
5395     }
5396     PALMetadata->setRegister(Key, Value);
5397     if (!trySkipToken(AsmToken::Comma))
5398       break;
5399   }
5400   return false;
5401 }
5402 
5403 /// ParseDirectiveAMDGPULDS
5404 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5405 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5406   if (getParser().checkForValidSection())
5407     return true;
5408 
5409   StringRef Name;
5410   SMLoc NameLoc = getLoc();
5411   if (getParser().parseIdentifier(Name))
5412     return TokError("expected identifier in directive");
5413 
5414   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5415   if (parseToken(AsmToken::Comma, "expected ','"))
5416     return true;
5417 
5418   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5419 
5420   int64_t Size;
5421   SMLoc SizeLoc = getLoc();
5422   if (getParser().parseAbsoluteExpression(Size))
5423     return true;
5424   if (Size < 0)
5425     return Error(SizeLoc, "size must be non-negative");
5426   if (Size > LocalMemorySize)
5427     return Error(SizeLoc, "size is too large");
5428 
5429   int64_t Alignment = 4;
5430   if (trySkipToken(AsmToken::Comma)) {
5431     SMLoc AlignLoc = getLoc();
5432     if (getParser().parseAbsoluteExpression(Alignment))
5433       return true;
5434     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5435       return Error(AlignLoc, "alignment must be a power of two");
5436 
5437     // Alignment larger than the size of LDS is possible in theory, as long
5438     // as the linker manages to place to symbol at address 0, but we do want
5439     // to make sure the alignment fits nicely into a 32-bit integer.
5440     if (Alignment >= 1u << 31)
5441       return Error(AlignLoc, "alignment is too large");
5442   }
5443 
5444   if (parseToken(AsmToken::EndOfStatement,
5445                  "unexpected token in '.amdgpu_lds' directive"))
5446     return true;
5447 
5448   Symbol->redefineIfPossible();
5449   if (!Symbol->isUndefined())
5450     return Error(NameLoc, "invalid symbol redefinition");
5451 
5452   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5453   return false;
5454 }
5455 
5456 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5457   StringRef IDVal = DirectiveID.getString();
5458 
5459   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5460     if (IDVal == ".amdhsa_kernel")
5461      return ParseDirectiveAMDHSAKernel();
5462 
5463     // TODO: Restructure/combine with PAL metadata directive.
5464     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5465       return ParseDirectiveHSAMetadata();
5466   } else {
5467     if (IDVal == ".hsa_code_object_version")
5468       return ParseDirectiveHSACodeObjectVersion();
5469 
5470     if (IDVal == ".hsa_code_object_isa")
5471       return ParseDirectiveHSACodeObjectISA();
5472 
5473     if (IDVal == ".amd_kernel_code_t")
5474       return ParseDirectiveAMDKernelCodeT();
5475 
5476     if (IDVal == ".amdgpu_hsa_kernel")
5477       return ParseDirectiveAMDGPUHsaKernel();
5478 
5479     if (IDVal == ".amd_amdgpu_isa")
5480       return ParseDirectiveISAVersion();
5481 
5482     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5483       return ParseDirectiveHSAMetadata();
5484   }
5485 
5486   if (IDVal == ".amdgcn_target")
5487     return ParseDirectiveAMDGCNTarget();
5488 
5489   if (IDVal == ".amdgpu_lds")
5490     return ParseDirectiveAMDGPULDS();
5491 
5492   if (IDVal == PALMD::AssemblerDirectiveBegin)
5493     return ParseDirectivePALMetadataBegin();
5494 
5495   if (IDVal == PALMD::AssemblerDirective)
5496     return ParseDirectivePALMetadata();
5497 
5498   return true;
5499 }
5500 
5501 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5502                                            unsigned RegNo) {
5503 
5504   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5505     return isGFX9Plus();
5506 
5507   // GFX10 has 2 more SGPRs 104 and 105.
5508   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5509     return hasSGPR104_SGPR105();
5510 
5511   switch (RegNo) {
5512   case AMDGPU::SRC_SHARED_BASE:
5513   case AMDGPU::SRC_SHARED_LIMIT:
5514   case AMDGPU::SRC_PRIVATE_BASE:
5515   case AMDGPU::SRC_PRIVATE_LIMIT:
5516   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5517     return isGFX9Plus();
5518   case AMDGPU::TBA:
5519   case AMDGPU::TBA_LO:
5520   case AMDGPU::TBA_HI:
5521   case AMDGPU::TMA:
5522   case AMDGPU::TMA_LO:
5523   case AMDGPU::TMA_HI:
5524     return !isGFX9Plus();
5525   case AMDGPU::XNACK_MASK:
5526   case AMDGPU::XNACK_MASK_LO:
5527   case AMDGPU::XNACK_MASK_HI:
5528     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5529   case AMDGPU::SGPR_NULL:
5530     return isGFX10Plus();
5531   default:
5532     break;
5533   }
5534 
5535   if (isCI())
5536     return true;
5537 
5538   if (isSI() || isGFX10Plus()) {
5539     // No flat_scr on SI.
5540     // On GFX10 flat scratch is not a valid register operand and can only be
5541     // accessed with s_setreg/s_getreg.
5542     switch (RegNo) {
5543     case AMDGPU::FLAT_SCR:
5544     case AMDGPU::FLAT_SCR_LO:
5545     case AMDGPU::FLAT_SCR_HI:
5546       return false;
5547     default:
5548       return true;
5549     }
5550   }
5551 
5552   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5553   // SI/CI have.
5554   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5555     return hasSGPR102_SGPR103();
5556 
5557   return true;
5558 }
5559 
5560 OperandMatchResultTy
5561 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5562                               OperandMode Mode) {
5563   // Try to parse with a custom parser
5564   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5565 
5566   // If we successfully parsed the operand or if there as an error parsing,
5567   // we are done.
5568   //
5569   // If we are parsing after we reach EndOfStatement then this means we
5570   // are appending default values to the Operands list.  This is only done
5571   // by custom parser, so we shouldn't continue on to the generic parsing.
5572   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5573       isToken(AsmToken::EndOfStatement))
5574     return ResTy;
5575 
5576   SMLoc RBraceLoc;
5577   SMLoc LBraceLoc = getLoc();
5578   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5579     unsigned Prefix = Operands.size();
5580 
5581     for (;;) {
5582       auto Loc = getLoc();
5583       ResTy = parseReg(Operands);
5584       if (ResTy == MatchOperand_NoMatch)
5585         Error(Loc, "expected a register");
5586       if (ResTy != MatchOperand_Success)
5587         return MatchOperand_ParseFail;
5588 
5589       RBraceLoc = getLoc();
5590       if (trySkipToken(AsmToken::RBrac))
5591         break;
5592 
5593       if (!skipToken(AsmToken::Comma,
5594                      "expected a comma or a closing square bracket")) {
5595         return MatchOperand_ParseFail;
5596       }
5597     }
5598 
5599     if (Operands.size() - Prefix > 1) {
5600       Operands.insert(Operands.begin() + Prefix,
5601                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5602       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5603     }
5604 
5605     return MatchOperand_Success;
5606   }
5607 
5608   return parseRegOrImm(Operands);
5609 }
5610 
5611 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5612   // Clear any forced encodings from the previous instruction.
5613   setForcedEncodingSize(0);
5614   setForcedDPP(false);
5615   setForcedSDWA(false);
5616 
5617   if (Name.endswith("_e64")) {
5618     setForcedEncodingSize(64);
5619     return Name.substr(0, Name.size() - 4);
5620   } else if (Name.endswith("_e32")) {
5621     setForcedEncodingSize(32);
5622     return Name.substr(0, Name.size() - 4);
5623   } else if (Name.endswith("_dpp")) {
5624     setForcedDPP(true);
5625     return Name.substr(0, Name.size() - 4);
5626   } else if (Name.endswith("_sdwa")) {
5627     setForcedSDWA(true);
5628     return Name.substr(0, Name.size() - 5);
5629   }
5630   return Name;
5631 }
5632 
5633 static void applyMnemonicAliases(StringRef &Mnemonic,
5634                                  const FeatureBitset &Features,
5635                                  unsigned VariantID);
5636 
5637 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5638                                        StringRef Name,
5639                                        SMLoc NameLoc, OperandVector &Operands) {
5640   // Add the instruction mnemonic
5641   Name = parseMnemonicSuffix(Name);
5642 
5643   // If the target architecture uses MnemonicAlias, call it here to parse
5644   // operands correctly.
5645   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5646 
5647   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5648 
5649   bool IsMIMG = Name.startswith("image_");
5650 
5651   while (!trySkipToken(AsmToken::EndOfStatement)) {
5652     OperandMode Mode = OperandMode_Default;
5653     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5654       Mode = OperandMode_NSA;
5655     CPolSeen = 0;
5656     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5657 
5658     if (Res != MatchOperand_Success) {
5659       checkUnsupportedInstruction(Name, NameLoc);
5660       if (!Parser.hasPendingError()) {
5661         // FIXME: use real operand location rather than the current location.
5662         StringRef Msg =
5663           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5664                                             "not a valid operand.";
5665         Error(getLoc(), Msg);
5666       }
5667       while (!trySkipToken(AsmToken::EndOfStatement)) {
5668         lex();
5669       }
5670       return true;
5671     }
5672 
5673     // Eat the comma or space if there is one.
5674     trySkipToken(AsmToken::Comma);
5675   }
5676 
5677   return false;
5678 }
5679 
5680 //===----------------------------------------------------------------------===//
5681 // Utility functions
5682 //===----------------------------------------------------------------------===//
5683 
5684 OperandMatchResultTy
5685 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5686 
5687   if (!trySkipId(Prefix, AsmToken::Colon))
5688     return MatchOperand_NoMatch;
5689 
5690   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5691 }
5692 
5693 OperandMatchResultTy
5694 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5695                                     AMDGPUOperand::ImmTy ImmTy,
5696                                     bool (*ConvertResult)(int64_t&)) {
5697   SMLoc S = getLoc();
5698   int64_t Value = 0;
5699 
5700   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5701   if (Res != MatchOperand_Success)
5702     return Res;
5703 
5704   if (ConvertResult && !ConvertResult(Value)) {
5705     Error(S, "invalid " + StringRef(Prefix) + " value.");
5706   }
5707 
5708   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5709   return MatchOperand_Success;
5710 }
5711 
5712 OperandMatchResultTy
5713 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5714                                              OperandVector &Operands,
5715                                              AMDGPUOperand::ImmTy ImmTy,
5716                                              bool (*ConvertResult)(int64_t&)) {
5717   SMLoc S = getLoc();
5718   if (!trySkipId(Prefix, AsmToken::Colon))
5719     return MatchOperand_NoMatch;
5720 
5721   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5722     return MatchOperand_ParseFail;
5723 
5724   unsigned Val = 0;
5725   const unsigned MaxSize = 4;
5726 
5727   // FIXME: How to verify the number of elements matches the number of src
5728   // operands?
5729   for (int I = 0; ; ++I) {
5730     int64_t Op;
5731     SMLoc Loc = getLoc();
5732     if (!parseExpr(Op))
5733       return MatchOperand_ParseFail;
5734 
5735     if (Op != 0 && Op != 1) {
5736       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5737       return MatchOperand_ParseFail;
5738     }
5739 
5740     Val |= (Op << I);
5741 
5742     if (trySkipToken(AsmToken::RBrac))
5743       break;
5744 
5745     if (I + 1 == MaxSize) {
5746       Error(getLoc(), "expected a closing square bracket");
5747       return MatchOperand_ParseFail;
5748     }
5749 
5750     if (!skipToken(AsmToken::Comma, "expected a comma"))
5751       return MatchOperand_ParseFail;
5752   }
5753 
5754   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5755   return MatchOperand_Success;
5756 }
5757 
5758 OperandMatchResultTy
5759 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5760                                AMDGPUOperand::ImmTy ImmTy) {
5761   int64_t Bit;
5762   SMLoc S = getLoc();
5763 
5764   if (trySkipId(Name)) {
5765     Bit = 1;
5766   } else if (trySkipId("no", Name)) {
5767     Bit = 0;
5768   } else {
5769     return MatchOperand_NoMatch;
5770   }
5771 
5772   if (Name == "r128" && !hasMIMG_R128()) {
5773     Error(S, "r128 modifier is not supported on this GPU");
5774     return MatchOperand_ParseFail;
5775   }
5776   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5777     Error(S, "a16 modifier is not supported on this GPU");
5778     return MatchOperand_ParseFail;
5779   }
5780 
5781   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5782     ImmTy = AMDGPUOperand::ImmTyR128A16;
5783 
5784   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5785   return MatchOperand_Success;
5786 }
5787 
5788 OperandMatchResultTy
5789 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5790   unsigned CPolOn = 0;
5791   unsigned CPolOff = 0;
5792   SMLoc S = getLoc();
5793 
5794   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5795   if (isGFX940() && !Mnemo.startswith("s_")) {
5796     if (trySkipId("sc0"))
5797       CPolOn = AMDGPU::CPol::SC0;
5798     else if (trySkipId("nosc0"))
5799       CPolOff = AMDGPU::CPol::SC0;
5800     else if (trySkipId("nt"))
5801       CPolOn = AMDGPU::CPol::NT;
5802     else if (trySkipId("nont"))
5803       CPolOff = AMDGPU::CPol::NT;
5804     else if (trySkipId("sc1"))
5805       CPolOn = AMDGPU::CPol::SC1;
5806     else if (trySkipId("nosc1"))
5807       CPolOff = AMDGPU::CPol::SC1;
5808     else
5809       return MatchOperand_NoMatch;
5810   }
5811   else if (trySkipId("glc"))
5812     CPolOn = AMDGPU::CPol::GLC;
5813   else if (trySkipId("noglc"))
5814     CPolOff = AMDGPU::CPol::GLC;
5815   else if (trySkipId("slc"))
5816     CPolOn = AMDGPU::CPol::SLC;
5817   else if (trySkipId("noslc"))
5818     CPolOff = AMDGPU::CPol::SLC;
5819   else if (trySkipId("dlc"))
5820     CPolOn = AMDGPU::CPol::DLC;
5821   else if (trySkipId("nodlc"))
5822     CPolOff = AMDGPU::CPol::DLC;
5823   else if (trySkipId("scc"))
5824     CPolOn = AMDGPU::CPol::SCC;
5825   else if (trySkipId("noscc"))
5826     CPolOff = AMDGPU::CPol::SCC;
5827   else
5828     return MatchOperand_NoMatch;
5829 
5830   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5831     Error(S, "dlc modifier is not supported on this GPU");
5832     return MatchOperand_ParseFail;
5833   }
5834 
5835   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5836     Error(S, "scc modifier is not supported on this GPU");
5837     return MatchOperand_ParseFail;
5838   }
5839 
5840   if (CPolSeen & (CPolOn | CPolOff)) {
5841     Error(S, "duplicate cache policy modifier");
5842     return MatchOperand_ParseFail;
5843   }
5844 
5845   CPolSeen |= (CPolOn | CPolOff);
5846 
5847   for (unsigned I = 1; I != Operands.size(); ++I) {
5848     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5849     if (Op.isCPol()) {
5850       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5851       return MatchOperand_Success;
5852     }
5853   }
5854 
5855   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5856                                               AMDGPUOperand::ImmTyCPol));
5857 
5858   return MatchOperand_Success;
5859 }
5860 
5861 static void addOptionalImmOperand(
5862   MCInst& Inst, const OperandVector& Operands,
5863   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5864   AMDGPUOperand::ImmTy ImmT,
5865   int64_t Default = 0) {
5866   auto i = OptionalIdx.find(ImmT);
5867   if (i != OptionalIdx.end()) {
5868     unsigned Idx = i->second;
5869     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5870   } else {
5871     Inst.addOperand(MCOperand::createImm(Default));
5872   }
5873 }
5874 
5875 OperandMatchResultTy
5876 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5877                                        StringRef &Value,
5878                                        SMLoc &StringLoc) {
5879   if (!trySkipId(Prefix, AsmToken::Colon))
5880     return MatchOperand_NoMatch;
5881 
5882   StringLoc = getLoc();
5883   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5884                                                   : MatchOperand_ParseFail;
5885 }
5886 
5887 //===----------------------------------------------------------------------===//
5888 // MTBUF format
5889 //===----------------------------------------------------------------------===//
5890 
5891 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5892                                   int64_t MaxVal,
5893                                   int64_t &Fmt) {
5894   int64_t Val;
5895   SMLoc Loc = getLoc();
5896 
5897   auto Res = parseIntWithPrefix(Pref, Val);
5898   if (Res == MatchOperand_ParseFail)
5899     return false;
5900   if (Res == MatchOperand_NoMatch)
5901     return true;
5902 
5903   if (Val < 0 || Val > MaxVal) {
5904     Error(Loc, Twine("out of range ", StringRef(Pref)));
5905     return false;
5906   }
5907 
5908   Fmt = Val;
5909   return true;
5910 }
5911 
5912 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5913 // values to live in a joint format operand in the MCInst encoding.
5914 OperandMatchResultTy
5915 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5916   using namespace llvm::AMDGPU::MTBUFFormat;
5917 
5918   int64_t Dfmt = DFMT_UNDEF;
5919   int64_t Nfmt = NFMT_UNDEF;
5920 
5921   // dfmt and nfmt can appear in either order, and each is optional.
5922   for (int I = 0; I < 2; ++I) {
5923     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5924       return MatchOperand_ParseFail;
5925 
5926     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5927       return MatchOperand_ParseFail;
5928     }
5929     // Skip optional comma between dfmt/nfmt
5930     // but guard against 2 commas following each other.
5931     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5932         !peekToken().is(AsmToken::Comma)) {
5933       trySkipToken(AsmToken::Comma);
5934     }
5935   }
5936 
5937   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5938     return MatchOperand_NoMatch;
5939 
5940   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5941   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5942 
5943   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5944   return MatchOperand_Success;
5945 }
5946 
5947 OperandMatchResultTy
5948 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5949   using namespace llvm::AMDGPU::MTBUFFormat;
5950 
5951   int64_t Fmt = UFMT_UNDEF;
5952 
5953   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5954     return MatchOperand_ParseFail;
5955 
5956   if (Fmt == UFMT_UNDEF)
5957     return MatchOperand_NoMatch;
5958 
5959   Format = Fmt;
5960   return MatchOperand_Success;
5961 }
5962 
5963 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5964                                     int64_t &Nfmt,
5965                                     StringRef FormatStr,
5966                                     SMLoc Loc) {
5967   using namespace llvm::AMDGPU::MTBUFFormat;
5968   int64_t Format;
5969 
5970   Format = getDfmt(FormatStr);
5971   if (Format != DFMT_UNDEF) {
5972     Dfmt = Format;
5973     return true;
5974   }
5975 
5976   Format = getNfmt(FormatStr, getSTI());
5977   if (Format != NFMT_UNDEF) {
5978     Nfmt = Format;
5979     return true;
5980   }
5981 
5982   Error(Loc, "unsupported format");
5983   return false;
5984 }
5985 
5986 OperandMatchResultTy
5987 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5988                                           SMLoc FormatLoc,
5989                                           int64_t &Format) {
5990   using namespace llvm::AMDGPU::MTBUFFormat;
5991 
5992   int64_t Dfmt = DFMT_UNDEF;
5993   int64_t Nfmt = NFMT_UNDEF;
5994   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5995     return MatchOperand_ParseFail;
5996 
5997   if (trySkipToken(AsmToken::Comma)) {
5998     StringRef Str;
5999     SMLoc Loc = getLoc();
6000     if (!parseId(Str, "expected a format string") ||
6001         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6002       return MatchOperand_ParseFail;
6003     }
6004     if (Dfmt == DFMT_UNDEF) {
6005       Error(Loc, "duplicate numeric format");
6006       return MatchOperand_ParseFail;
6007     } else if (Nfmt == NFMT_UNDEF) {
6008       Error(Loc, "duplicate data format");
6009       return MatchOperand_ParseFail;
6010     }
6011   }
6012 
6013   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6014   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6015 
6016   if (isGFX10Plus()) {
6017     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6018     if (Ufmt == UFMT_UNDEF) {
6019       Error(FormatLoc, "unsupported format");
6020       return MatchOperand_ParseFail;
6021     }
6022     Format = Ufmt;
6023   } else {
6024     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6025   }
6026 
6027   return MatchOperand_Success;
6028 }
6029 
6030 OperandMatchResultTy
6031 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6032                                             SMLoc Loc,
6033                                             int64_t &Format) {
6034   using namespace llvm::AMDGPU::MTBUFFormat;
6035 
6036   auto Id = getUnifiedFormat(FormatStr, getSTI());
6037   if (Id == UFMT_UNDEF)
6038     return MatchOperand_NoMatch;
6039 
6040   if (!isGFX10Plus()) {
6041     Error(Loc, "unified format is not supported on this GPU");
6042     return MatchOperand_ParseFail;
6043   }
6044 
6045   Format = Id;
6046   return MatchOperand_Success;
6047 }
6048 
6049 OperandMatchResultTy
6050 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6051   using namespace llvm::AMDGPU::MTBUFFormat;
6052   SMLoc Loc = getLoc();
6053 
6054   if (!parseExpr(Format))
6055     return MatchOperand_ParseFail;
6056   if (!isValidFormatEncoding(Format, getSTI())) {
6057     Error(Loc, "out of range format");
6058     return MatchOperand_ParseFail;
6059   }
6060 
6061   return MatchOperand_Success;
6062 }
6063 
6064 OperandMatchResultTy
6065 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6066   using namespace llvm::AMDGPU::MTBUFFormat;
6067 
6068   if (!trySkipId("format", AsmToken::Colon))
6069     return MatchOperand_NoMatch;
6070 
6071   if (trySkipToken(AsmToken::LBrac)) {
6072     StringRef FormatStr;
6073     SMLoc Loc = getLoc();
6074     if (!parseId(FormatStr, "expected a format string"))
6075       return MatchOperand_ParseFail;
6076 
6077     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6078     if (Res == MatchOperand_NoMatch)
6079       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6080     if (Res != MatchOperand_Success)
6081       return Res;
6082 
6083     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6084       return MatchOperand_ParseFail;
6085 
6086     return MatchOperand_Success;
6087   }
6088 
6089   return parseNumericFormat(Format);
6090 }
6091 
6092 OperandMatchResultTy
6093 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6094   using namespace llvm::AMDGPU::MTBUFFormat;
6095 
6096   int64_t Format = getDefaultFormatEncoding(getSTI());
6097   OperandMatchResultTy Res;
6098   SMLoc Loc = getLoc();
6099 
6100   // Parse legacy format syntax.
6101   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6102   if (Res == MatchOperand_ParseFail)
6103     return Res;
6104 
6105   bool FormatFound = (Res == MatchOperand_Success);
6106 
6107   Operands.push_back(
6108     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6109 
6110   if (FormatFound)
6111     trySkipToken(AsmToken::Comma);
6112 
6113   if (isToken(AsmToken::EndOfStatement)) {
6114     // We are expecting an soffset operand,
6115     // but let matcher handle the error.
6116     return MatchOperand_Success;
6117   }
6118 
6119   // Parse soffset.
6120   Res = parseRegOrImm(Operands);
6121   if (Res != MatchOperand_Success)
6122     return Res;
6123 
6124   trySkipToken(AsmToken::Comma);
6125 
6126   if (!FormatFound) {
6127     Res = parseSymbolicOrNumericFormat(Format);
6128     if (Res == MatchOperand_ParseFail)
6129       return Res;
6130     if (Res == MatchOperand_Success) {
6131       auto Size = Operands.size();
6132       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6133       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6134       Op.setImm(Format);
6135     }
6136     return MatchOperand_Success;
6137   }
6138 
6139   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6140     Error(getLoc(), "duplicate format");
6141     return MatchOperand_ParseFail;
6142   }
6143   return MatchOperand_Success;
6144 }
6145 
6146 //===----------------------------------------------------------------------===//
6147 // ds
6148 //===----------------------------------------------------------------------===//
6149 
6150 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6151                                     const OperandVector &Operands) {
6152   OptionalImmIndexMap OptionalIdx;
6153 
6154   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6155     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6156 
6157     // Add the register arguments
6158     if (Op.isReg()) {
6159       Op.addRegOperands(Inst, 1);
6160       continue;
6161     }
6162 
6163     // Handle optional arguments
6164     OptionalIdx[Op.getImmTy()] = i;
6165   }
6166 
6167   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6168   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6169   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6170 
6171   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6172 }
6173 
6174 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6175                                 bool IsGdsHardcoded) {
6176   OptionalImmIndexMap OptionalIdx;
6177 
6178   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6179     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6180 
6181     // Add the register arguments
6182     if (Op.isReg()) {
6183       Op.addRegOperands(Inst, 1);
6184       continue;
6185     }
6186 
6187     if (Op.isToken() && Op.getToken() == "gds") {
6188       IsGdsHardcoded = true;
6189       continue;
6190     }
6191 
6192     // Handle optional arguments
6193     OptionalIdx[Op.getImmTy()] = i;
6194   }
6195 
6196   AMDGPUOperand::ImmTy OffsetType =
6197     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6198      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6199      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6200                                                       AMDGPUOperand::ImmTyOffset;
6201 
6202   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6203 
6204   if (!IsGdsHardcoded) {
6205     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6206   }
6207   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6208 }
6209 
6210 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6211   OptionalImmIndexMap OptionalIdx;
6212 
6213   unsigned OperandIdx[4];
6214   unsigned EnMask = 0;
6215   int SrcIdx = 0;
6216 
6217   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6218     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6219 
6220     // Add the register arguments
6221     if (Op.isReg()) {
6222       assert(SrcIdx < 4);
6223       OperandIdx[SrcIdx] = Inst.size();
6224       Op.addRegOperands(Inst, 1);
6225       ++SrcIdx;
6226       continue;
6227     }
6228 
6229     if (Op.isOff()) {
6230       assert(SrcIdx < 4);
6231       OperandIdx[SrcIdx] = Inst.size();
6232       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6233       ++SrcIdx;
6234       continue;
6235     }
6236 
6237     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6238       Op.addImmOperands(Inst, 1);
6239       continue;
6240     }
6241 
6242     if (Op.isToken() && Op.getToken() == "done")
6243       continue;
6244 
6245     // Handle optional arguments
6246     OptionalIdx[Op.getImmTy()] = i;
6247   }
6248 
6249   assert(SrcIdx == 4);
6250 
6251   bool Compr = false;
6252   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6253     Compr = true;
6254     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6255     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6256     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6257   }
6258 
6259   for (auto i = 0; i < SrcIdx; ++i) {
6260     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6261       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6262     }
6263   }
6264 
6265   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6266   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6267 
6268   Inst.addOperand(MCOperand::createImm(EnMask));
6269 }
6270 
6271 //===----------------------------------------------------------------------===//
6272 // s_waitcnt
6273 //===----------------------------------------------------------------------===//
6274 
6275 static bool
6276 encodeCnt(
6277   const AMDGPU::IsaVersion ISA,
6278   int64_t &IntVal,
6279   int64_t CntVal,
6280   bool Saturate,
6281   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6282   unsigned (*decode)(const IsaVersion &Version, unsigned))
6283 {
6284   bool Failed = false;
6285 
6286   IntVal = encode(ISA, IntVal, CntVal);
6287   if (CntVal != decode(ISA, IntVal)) {
6288     if (Saturate) {
6289       IntVal = encode(ISA, IntVal, -1);
6290     } else {
6291       Failed = true;
6292     }
6293   }
6294   return Failed;
6295 }
6296 
6297 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6298 
6299   SMLoc CntLoc = getLoc();
6300   StringRef CntName = getTokenStr();
6301 
6302   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6303       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6304     return false;
6305 
6306   int64_t CntVal;
6307   SMLoc ValLoc = getLoc();
6308   if (!parseExpr(CntVal))
6309     return false;
6310 
6311   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6312 
6313   bool Failed = true;
6314   bool Sat = CntName.endswith("_sat");
6315 
6316   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6317     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6318   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6319     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6320   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6321     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6322   } else {
6323     Error(CntLoc, "invalid counter name " + CntName);
6324     return false;
6325   }
6326 
6327   if (Failed) {
6328     Error(ValLoc, "too large value for " + CntName);
6329     return false;
6330   }
6331 
6332   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6333     return false;
6334 
6335   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6336     if (isToken(AsmToken::EndOfStatement)) {
6337       Error(getLoc(), "expected a counter name");
6338       return false;
6339     }
6340   }
6341 
6342   return true;
6343 }
6344 
6345 OperandMatchResultTy
6346 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6347   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6348   int64_t Waitcnt = getWaitcntBitMask(ISA);
6349   SMLoc S = getLoc();
6350 
6351   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6352     while (!isToken(AsmToken::EndOfStatement)) {
6353       if (!parseCnt(Waitcnt))
6354         return MatchOperand_ParseFail;
6355     }
6356   } else {
6357     if (!parseExpr(Waitcnt))
6358       return MatchOperand_ParseFail;
6359   }
6360 
6361   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6362   return MatchOperand_Success;
6363 }
6364 
6365 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6366   SMLoc FieldLoc = getLoc();
6367   StringRef FieldName = getTokenStr();
6368   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6369       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6370     return false;
6371 
6372   SMLoc ValueLoc = getLoc();
6373   StringRef ValueName = getTokenStr();
6374   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6375       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6376     return false;
6377 
6378   unsigned Shift;
6379   if (FieldName == "instid0") {
6380     Shift = 0;
6381   } else if (FieldName == "instskip") {
6382     Shift = 4;
6383   } else if (FieldName == "instid1") {
6384     Shift = 7;
6385   } else {
6386     Error(FieldLoc, "invalid field name " + FieldName);
6387     return false;
6388   }
6389 
6390   int Value;
6391   if (Shift == 4) {
6392     // Parse values for instskip.
6393     Value = StringSwitch<int>(ValueName)
6394                 .Case("SAME", 0)
6395                 .Case("NEXT", 1)
6396                 .Case("SKIP_1", 2)
6397                 .Case("SKIP_2", 3)
6398                 .Case("SKIP_3", 4)
6399                 .Case("SKIP_4", 5)
6400                 .Default(-1);
6401   } else {
6402     // Parse values for instid0 and instid1.
6403     Value = StringSwitch<int>(ValueName)
6404                 .Case("NO_DEP", 0)
6405                 .Case("VALU_DEP_1", 1)
6406                 .Case("VALU_DEP_2", 2)
6407                 .Case("VALU_DEP_3", 3)
6408                 .Case("VALU_DEP_4", 4)
6409                 .Case("TRANS32_DEP_1", 5)
6410                 .Case("TRANS32_DEP_2", 6)
6411                 .Case("TRANS32_DEP_3", 7)
6412                 .Case("FMA_ACCUM_CYCLE_1", 8)
6413                 .Case("SALU_CYCLE_1", 9)
6414                 .Case("SALU_CYCLE_2", 10)
6415                 .Case("SALU_CYCLE_3", 11)
6416                 .Default(-1);
6417   }
6418   if (Value < 0) {
6419     Error(ValueLoc, "invalid value name " + ValueName);
6420     return false;
6421   }
6422 
6423   Delay |= Value << Shift;
6424   return true;
6425 }
6426 
6427 OperandMatchResultTy
6428 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6429   int64_t Delay = 0;
6430   SMLoc S = getLoc();
6431 
6432   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6433     do {
6434       if (!parseDelay(Delay))
6435         return MatchOperand_ParseFail;
6436     } while (trySkipToken(AsmToken::Pipe));
6437   } else {
6438     if (!parseExpr(Delay))
6439       return MatchOperand_ParseFail;
6440   }
6441 
6442   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6443   return MatchOperand_Success;
6444 }
6445 
6446 bool
6447 AMDGPUOperand::isSWaitCnt() const {
6448   return isImm();
6449 }
6450 
6451 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6452 
6453 //===----------------------------------------------------------------------===//
6454 // DepCtr
6455 //===----------------------------------------------------------------------===//
6456 
6457 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6458                                   StringRef DepCtrName) {
6459   switch (ErrorId) {
6460   case OPR_ID_UNKNOWN:
6461     Error(Loc, Twine("invalid counter name ", DepCtrName));
6462     return;
6463   case OPR_ID_UNSUPPORTED:
6464     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6465     return;
6466   case OPR_ID_DUPLICATE:
6467     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6468     return;
6469   case OPR_VAL_INVALID:
6470     Error(Loc, Twine("invalid value for ", DepCtrName));
6471     return;
6472   default:
6473     assert(false);
6474   }
6475 }
6476 
6477 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6478 
6479   using namespace llvm::AMDGPU::DepCtr;
6480 
6481   SMLoc DepCtrLoc = getLoc();
6482   StringRef DepCtrName = getTokenStr();
6483 
6484   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6485       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6486     return false;
6487 
6488   int64_t ExprVal;
6489   if (!parseExpr(ExprVal))
6490     return false;
6491 
6492   unsigned PrevOprMask = UsedOprMask;
6493   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6494 
6495   if (CntVal < 0) {
6496     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6497     return false;
6498   }
6499 
6500   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6501     return false;
6502 
6503   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6504     if (isToken(AsmToken::EndOfStatement)) {
6505       Error(getLoc(), "expected a counter name");
6506       return false;
6507     }
6508   }
6509 
6510   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6511   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6512   return true;
6513 }
6514 
6515 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6516   using namespace llvm::AMDGPU::DepCtr;
6517 
6518   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6519   SMLoc Loc = getLoc();
6520 
6521   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6522     unsigned UsedOprMask = 0;
6523     while (!isToken(AsmToken::EndOfStatement)) {
6524       if (!parseDepCtr(DepCtr, UsedOprMask))
6525         return MatchOperand_ParseFail;
6526     }
6527   } else {
6528     if (!parseExpr(DepCtr))
6529       return MatchOperand_ParseFail;
6530   }
6531 
6532   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6533   return MatchOperand_Success;
6534 }
6535 
6536 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6537 
6538 //===----------------------------------------------------------------------===//
6539 // hwreg
6540 //===----------------------------------------------------------------------===//
6541 
6542 bool
6543 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6544                                 OperandInfoTy &Offset,
6545                                 OperandInfoTy &Width) {
6546   using namespace llvm::AMDGPU::Hwreg;
6547 
6548   // The register may be specified by name or using a numeric code
6549   HwReg.Loc = getLoc();
6550   if (isToken(AsmToken::Identifier) &&
6551       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6552     HwReg.IsSymbolic = true;
6553     lex(); // skip register name
6554   } else if (!parseExpr(HwReg.Id, "a register name")) {
6555     return false;
6556   }
6557 
6558   if (trySkipToken(AsmToken::RParen))
6559     return true;
6560 
6561   // parse optional params
6562   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6563     return false;
6564 
6565   Offset.Loc = getLoc();
6566   if (!parseExpr(Offset.Id))
6567     return false;
6568 
6569   if (!skipToken(AsmToken::Comma, "expected a comma"))
6570     return false;
6571 
6572   Width.Loc = getLoc();
6573   return parseExpr(Width.Id) &&
6574          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6575 }
6576 
6577 bool
6578 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6579                                const OperandInfoTy &Offset,
6580                                const OperandInfoTy &Width) {
6581 
6582   using namespace llvm::AMDGPU::Hwreg;
6583 
6584   if (HwReg.IsSymbolic) {
6585     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6586       Error(HwReg.Loc,
6587             "specified hardware register is not supported on this GPU");
6588       return false;
6589     }
6590   } else {
6591     if (!isValidHwreg(HwReg.Id)) {
6592       Error(HwReg.Loc,
6593             "invalid code of hardware register: only 6-bit values are legal");
6594       return false;
6595     }
6596   }
6597   if (!isValidHwregOffset(Offset.Id)) {
6598     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6599     return false;
6600   }
6601   if (!isValidHwregWidth(Width.Id)) {
6602     Error(Width.Loc,
6603           "invalid bitfield width: only values from 1 to 32 are legal");
6604     return false;
6605   }
6606   return true;
6607 }
6608 
6609 OperandMatchResultTy
6610 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6611   using namespace llvm::AMDGPU::Hwreg;
6612 
6613   int64_t ImmVal = 0;
6614   SMLoc Loc = getLoc();
6615 
6616   if (trySkipId("hwreg", AsmToken::LParen)) {
6617     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6618     OperandInfoTy Offset(OFFSET_DEFAULT_);
6619     OperandInfoTy Width(WIDTH_DEFAULT_);
6620     if (parseHwregBody(HwReg, Offset, Width) &&
6621         validateHwreg(HwReg, Offset, Width)) {
6622       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6623     } else {
6624       return MatchOperand_ParseFail;
6625     }
6626   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6627     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6628       Error(Loc, "invalid immediate: only 16-bit values are legal");
6629       return MatchOperand_ParseFail;
6630     }
6631   } else {
6632     return MatchOperand_ParseFail;
6633   }
6634 
6635   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6636   return MatchOperand_Success;
6637 }
6638 
6639 bool AMDGPUOperand::isHwreg() const {
6640   return isImmTy(ImmTyHwreg);
6641 }
6642 
6643 //===----------------------------------------------------------------------===//
6644 // sendmsg
6645 //===----------------------------------------------------------------------===//
6646 
6647 bool
6648 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6649                                   OperandInfoTy &Op,
6650                                   OperandInfoTy &Stream) {
6651   using namespace llvm::AMDGPU::SendMsg;
6652 
6653   Msg.Loc = getLoc();
6654   if (isToken(AsmToken::Identifier) &&
6655       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6656     Msg.IsSymbolic = true;
6657     lex(); // skip message name
6658   } else if (!parseExpr(Msg.Id, "a message name")) {
6659     return false;
6660   }
6661 
6662   if (trySkipToken(AsmToken::Comma)) {
6663     Op.IsDefined = true;
6664     Op.Loc = getLoc();
6665     if (isToken(AsmToken::Identifier) &&
6666         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6667       lex(); // skip operation name
6668     } else if (!parseExpr(Op.Id, "an operation name")) {
6669       return false;
6670     }
6671 
6672     if (trySkipToken(AsmToken::Comma)) {
6673       Stream.IsDefined = true;
6674       Stream.Loc = getLoc();
6675       if (!parseExpr(Stream.Id))
6676         return false;
6677     }
6678   }
6679 
6680   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6681 }
6682 
6683 bool
6684 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6685                                  const OperandInfoTy &Op,
6686                                  const OperandInfoTy &Stream) {
6687   using namespace llvm::AMDGPU::SendMsg;
6688 
6689   // Validation strictness depends on whether message is specified
6690   // in a symbolic or in a numeric form. In the latter case
6691   // only encoding possibility is checked.
6692   bool Strict = Msg.IsSymbolic;
6693 
6694   if (Strict) {
6695     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6696       Error(Msg.Loc, "specified message id is not supported on this GPU");
6697       return false;
6698     }
6699   } else {
6700     if (!isValidMsgId(Msg.Id, getSTI())) {
6701       Error(Msg.Loc, "invalid message id");
6702       return false;
6703     }
6704   }
6705   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6706     if (Op.IsDefined) {
6707       Error(Op.Loc, "message does not support operations");
6708     } else {
6709       Error(Msg.Loc, "missing message operation");
6710     }
6711     return false;
6712   }
6713   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6714     Error(Op.Loc, "invalid operation id");
6715     return false;
6716   }
6717   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6718       Stream.IsDefined) {
6719     Error(Stream.Loc, "message operation does not support streams");
6720     return false;
6721   }
6722   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6723     Error(Stream.Loc, "invalid message stream id");
6724     return false;
6725   }
6726   return true;
6727 }
6728 
6729 OperandMatchResultTy
6730 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6731   using namespace llvm::AMDGPU::SendMsg;
6732 
6733   int64_t ImmVal = 0;
6734   SMLoc Loc = getLoc();
6735 
6736   if (trySkipId("sendmsg", AsmToken::LParen)) {
6737     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6738     OperandInfoTy Op(OP_NONE_);
6739     OperandInfoTy Stream(STREAM_ID_NONE_);
6740     if (parseSendMsgBody(Msg, Op, Stream) &&
6741         validateSendMsg(Msg, Op, Stream)) {
6742       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6743     } else {
6744       return MatchOperand_ParseFail;
6745     }
6746   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6747     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6748       Error(Loc, "invalid immediate: only 16-bit values are legal");
6749       return MatchOperand_ParseFail;
6750     }
6751   } else {
6752     return MatchOperand_ParseFail;
6753   }
6754 
6755   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6756   return MatchOperand_Success;
6757 }
6758 
6759 bool AMDGPUOperand::isSendMsg() const {
6760   return isImmTy(ImmTySendMsg);
6761 }
6762 
6763 //===----------------------------------------------------------------------===//
6764 // v_interp
6765 //===----------------------------------------------------------------------===//
6766 
6767 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6768   StringRef Str;
6769   SMLoc S = getLoc();
6770 
6771   if (!parseId(Str))
6772     return MatchOperand_NoMatch;
6773 
6774   int Slot = StringSwitch<int>(Str)
6775     .Case("p10", 0)
6776     .Case("p20", 1)
6777     .Case("p0", 2)
6778     .Default(-1);
6779 
6780   if (Slot == -1) {
6781     Error(S, "invalid interpolation slot");
6782     return MatchOperand_ParseFail;
6783   }
6784 
6785   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6786                                               AMDGPUOperand::ImmTyInterpSlot));
6787   return MatchOperand_Success;
6788 }
6789 
6790 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6791   StringRef Str;
6792   SMLoc S = getLoc();
6793 
6794   if (!parseId(Str))
6795     return MatchOperand_NoMatch;
6796 
6797   if (!Str.startswith("attr")) {
6798     Error(S, "invalid interpolation attribute");
6799     return MatchOperand_ParseFail;
6800   }
6801 
6802   StringRef Chan = Str.take_back(2);
6803   int AttrChan = StringSwitch<int>(Chan)
6804     .Case(".x", 0)
6805     .Case(".y", 1)
6806     .Case(".z", 2)
6807     .Case(".w", 3)
6808     .Default(-1);
6809   if (AttrChan == -1) {
6810     Error(S, "invalid or missing interpolation attribute channel");
6811     return MatchOperand_ParseFail;
6812   }
6813 
6814   Str = Str.drop_back(2).drop_front(4);
6815 
6816   uint8_t Attr;
6817   if (Str.getAsInteger(10, Attr)) {
6818     Error(S, "invalid or missing interpolation attribute number");
6819     return MatchOperand_ParseFail;
6820   }
6821 
6822   if (Attr > 63) {
6823     Error(S, "out of bounds interpolation attribute number");
6824     return MatchOperand_ParseFail;
6825   }
6826 
6827   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6828 
6829   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6830                                               AMDGPUOperand::ImmTyInterpAttr));
6831   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6832                                               AMDGPUOperand::ImmTyAttrChan));
6833   return MatchOperand_Success;
6834 }
6835 
6836 //===----------------------------------------------------------------------===//
6837 // exp
6838 //===----------------------------------------------------------------------===//
6839 
6840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6841   using namespace llvm::AMDGPU::Exp;
6842 
6843   StringRef Str;
6844   SMLoc S = getLoc();
6845 
6846   if (!parseId(Str))
6847     return MatchOperand_NoMatch;
6848 
6849   unsigned Id = getTgtId(Str);
6850   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6851     Error(S, (Id == ET_INVALID) ?
6852                 "invalid exp target" :
6853                 "exp target is not supported on this GPU");
6854     return MatchOperand_ParseFail;
6855   }
6856 
6857   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6858                                               AMDGPUOperand::ImmTyExpTgt));
6859   return MatchOperand_Success;
6860 }
6861 
6862 //===----------------------------------------------------------------------===//
6863 // parser helpers
6864 //===----------------------------------------------------------------------===//
6865 
6866 bool
6867 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6868   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6869 }
6870 
6871 bool
6872 AMDGPUAsmParser::isId(const StringRef Id) const {
6873   return isId(getToken(), Id);
6874 }
6875 
6876 bool
6877 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6878   return getTokenKind() == Kind;
6879 }
6880 
6881 bool
6882 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6883   if (isId(Id)) {
6884     lex();
6885     return true;
6886   }
6887   return false;
6888 }
6889 
6890 bool
6891 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6892   if (isToken(AsmToken::Identifier)) {
6893     StringRef Tok = getTokenStr();
6894     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6895       lex();
6896       return true;
6897     }
6898   }
6899   return false;
6900 }
6901 
6902 bool
6903 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6904   if (isId(Id) && peekToken().is(Kind)) {
6905     lex();
6906     lex();
6907     return true;
6908   }
6909   return false;
6910 }
6911 
6912 bool
6913 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6914   if (isToken(Kind)) {
6915     lex();
6916     return true;
6917   }
6918   return false;
6919 }
6920 
6921 bool
6922 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6923                            const StringRef ErrMsg) {
6924   if (!trySkipToken(Kind)) {
6925     Error(getLoc(), ErrMsg);
6926     return false;
6927   }
6928   return true;
6929 }
6930 
6931 bool
6932 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6933   SMLoc S = getLoc();
6934 
6935   const MCExpr *Expr;
6936   if (Parser.parseExpression(Expr))
6937     return false;
6938 
6939   if (Expr->evaluateAsAbsolute(Imm))
6940     return true;
6941 
6942   if (Expected.empty()) {
6943     Error(S, "expected absolute expression");
6944   } else {
6945     Error(S, Twine("expected ", Expected) +
6946              Twine(" or an absolute expression"));
6947   }
6948   return false;
6949 }
6950 
6951 bool
6952 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6953   SMLoc S = getLoc();
6954 
6955   const MCExpr *Expr;
6956   if (Parser.parseExpression(Expr))
6957     return false;
6958 
6959   int64_t IntVal;
6960   if (Expr->evaluateAsAbsolute(IntVal)) {
6961     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6962   } else {
6963     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6964   }
6965   return true;
6966 }
6967 
6968 bool
6969 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6970   if (isToken(AsmToken::String)) {
6971     Val = getToken().getStringContents();
6972     lex();
6973     return true;
6974   } else {
6975     Error(getLoc(), ErrMsg);
6976     return false;
6977   }
6978 }
6979 
6980 bool
6981 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6982   if (isToken(AsmToken::Identifier)) {
6983     Val = getTokenStr();
6984     lex();
6985     return true;
6986   } else {
6987     if (!ErrMsg.empty())
6988       Error(getLoc(), ErrMsg);
6989     return false;
6990   }
6991 }
6992 
6993 AsmToken
6994 AMDGPUAsmParser::getToken() const {
6995   return Parser.getTok();
6996 }
6997 
6998 AsmToken
6999 AMDGPUAsmParser::peekToken() {
7000   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7001 }
7002 
7003 void
7004 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7005   auto TokCount = getLexer().peekTokens(Tokens);
7006 
7007   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7008     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7009 }
7010 
7011 AsmToken::TokenKind
7012 AMDGPUAsmParser::getTokenKind() const {
7013   return getLexer().getKind();
7014 }
7015 
7016 SMLoc
7017 AMDGPUAsmParser::getLoc() const {
7018   return getToken().getLoc();
7019 }
7020 
7021 StringRef
7022 AMDGPUAsmParser::getTokenStr() const {
7023   return getToken().getString();
7024 }
7025 
7026 void
7027 AMDGPUAsmParser::lex() {
7028   Parser.Lex();
7029 }
7030 
7031 SMLoc
7032 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7033                                const OperandVector &Operands) const {
7034   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7035     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7036     if (Test(Op))
7037       return Op.getStartLoc();
7038   }
7039   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7040 }
7041 
7042 SMLoc
7043 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7044                            const OperandVector &Operands) const {
7045   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7046   return getOperandLoc(Test, Operands);
7047 }
7048 
7049 SMLoc
7050 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7051                            const OperandVector &Operands) const {
7052   auto Test = [=](const AMDGPUOperand& Op) {
7053     return Op.isRegKind() && Op.getReg() == Reg;
7054   };
7055   return getOperandLoc(Test, Operands);
7056 }
7057 
7058 SMLoc
7059 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7060   auto Test = [](const AMDGPUOperand& Op) {
7061     return Op.IsImmKindLiteral() || Op.isExpr();
7062   };
7063   return getOperandLoc(Test, Operands);
7064 }
7065 
7066 SMLoc
7067 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7068   auto Test = [](const AMDGPUOperand& Op) {
7069     return Op.isImmKindConst();
7070   };
7071   return getOperandLoc(Test, Operands);
7072 }
7073 
7074 //===----------------------------------------------------------------------===//
7075 // swizzle
7076 //===----------------------------------------------------------------------===//
7077 
7078 LLVM_READNONE
7079 static unsigned
7080 encodeBitmaskPerm(const unsigned AndMask,
7081                   const unsigned OrMask,
7082                   const unsigned XorMask) {
7083   using namespace llvm::AMDGPU::Swizzle;
7084 
7085   return BITMASK_PERM_ENC |
7086          (AndMask << BITMASK_AND_SHIFT) |
7087          (OrMask  << BITMASK_OR_SHIFT)  |
7088          (XorMask << BITMASK_XOR_SHIFT);
7089 }
7090 
7091 bool
7092 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7093                                      const unsigned MinVal,
7094                                      const unsigned MaxVal,
7095                                      const StringRef ErrMsg,
7096                                      SMLoc &Loc) {
7097   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7098     return false;
7099   }
7100   Loc = getLoc();
7101   if (!parseExpr(Op)) {
7102     return false;
7103   }
7104   if (Op < MinVal || Op > MaxVal) {
7105     Error(Loc, ErrMsg);
7106     return false;
7107   }
7108 
7109   return true;
7110 }
7111 
7112 bool
7113 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7114                                       const unsigned MinVal,
7115                                       const unsigned MaxVal,
7116                                       const StringRef ErrMsg) {
7117   SMLoc Loc;
7118   for (unsigned i = 0; i < OpNum; ++i) {
7119     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7120       return false;
7121   }
7122 
7123   return true;
7124 }
7125 
7126 bool
7127 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7128   using namespace llvm::AMDGPU::Swizzle;
7129 
7130   int64_t Lane[LANE_NUM];
7131   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7132                            "expected a 2-bit lane id")) {
7133     Imm = QUAD_PERM_ENC;
7134     for (unsigned I = 0; I < LANE_NUM; ++I) {
7135       Imm |= Lane[I] << (LANE_SHIFT * I);
7136     }
7137     return true;
7138   }
7139   return false;
7140 }
7141 
7142 bool
7143 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7144   using namespace llvm::AMDGPU::Swizzle;
7145 
7146   SMLoc Loc;
7147   int64_t GroupSize;
7148   int64_t LaneIdx;
7149 
7150   if (!parseSwizzleOperand(GroupSize,
7151                            2, 32,
7152                            "group size must be in the interval [2,32]",
7153                            Loc)) {
7154     return false;
7155   }
7156   if (!isPowerOf2_64(GroupSize)) {
7157     Error(Loc, "group size must be a power of two");
7158     return false;
7159   }
7160   if (parseSwizzleOperand(LaneIdx,
7161                           0, GroupSize - 1,
7162                           "lane id must be in the interval [0,group size - 1]",
7163                           Loc)) {
7164     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7165     return true;
7166   }
7167   return false;
7168 }
7169 
7170 bool
7171 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7172   using namespace llvm::AMDGPU::Swizzle;
7173 
7174   SMLoc Loc;
7175   int64_t GroupSize;
7176 
7177   if (!parseSwizzleOperand(GroupSize,
7178                            2, 32,
7179                            "group size must be in the interval [2,32]",
7180                            Loc)) {
7181     return false;
7182   }
7183   if (!isPowerOf2_64(GroupSize)) {
7184     Error(Loc, "group size must be a power of two");
7185     return false;
7186   }
7187 
7188   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7189   return true;
7190 }
7191 
7192 bool
7193 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7194   using namespace llvm::AMDGPU::Swizzle;
7195 
7196   SMLoc Loc;
7197   int64_t GroupSize;
7198 
7199   if (!parseSwizzleOperand(GroupSize,
7200                            1, 16,
7201                            "group size must be in the interval [1,16]",
7202                            Loc)) {
7203     return false;
7204   }
7205   if (!isPowerOf2_64(GroupSize)) {
7206     Error(Loc, "group size must be a power of two");
7207     return false;
7208   }
7209 
7210   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7211   return true;
7212 }
7213 
7214 bool
7215 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7216   using namespace llvm::AMDGPU::Swizzle;
7217 
7218   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7219     return false;
7220   }
7221 
7222   StringRef Ctl;
7223   SMLoc StrLoc = getLoc();
7224   if (!parseString(Ctl)) {
7225     return false;
7226   }
7227   if (Ctl.size() != BITMASK_WIDTH) {
7228     Error(StrLoc, "expected a 5-character mask");
7229     return false;
7230   }
7231 
7232   unsigned AndMask = 0;
7233   unsigned OrMask = 0;
7234   unsigned XorMask = 0;
7235 
7236   for (size_t i = 0; i < Ctl.size(); ++i) {
7237     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7238     switch(Ctl[i]) {
7239     default:
7240       Error(StrLoc, "invalid mask");
7241       return false;
7242     case '0':
7243       break;
7244     case '1':
7245       OrMask |= Mask;
7246       break;
7247     case 'p':
7248       AndMask |= Mask;
7249       break;
7250     case 'i':
7251       AndMask |= Mask;
7252       XorMask |= Mask;
7253       break;
7254     }
7255   }
7256 
7257   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7258   return true;
7259 }
7260 
7261 bool
7262 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7263 
7264   SMLoc OffsetLoc = getLoc();
7265 
7266   if (!parseExpr(Imm, "a swizzle macro")) {
7267     return false;
7268   }
7269   if (!isUInt<16>(Imm)) {
7270     Error(OffsetLoc, "expected a 16-bit offset");
7271     return false;
7272   }
7273   return true;
7274 }
7275 
7276 bool
7277 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7278   using namespace llvm::AMDGPU::Swizzle;
7279 
7280   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7281 
7282     SMLoc ModeLoc = getLoc();
7283     bool Ok = false;
7284 
7285     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7286       Ok = parseSwizzleQuadPerm(Imm);
7287     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7288       Ok = parseSwizzleBitmaskPerm(Imm);
7289     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7290       Ok = parseSwizzleBroadcast(Imm);
7291     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7292       Ok = parseSwizzleSwap(Imm);
7293     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7294       Ok = parseSwizzleReverse(Imm);
7295     } else {
7296       Error(ModeLoc, "expected a swizzle mode");
7297     }
7298 
7299     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7300   }
7301 
7302   return false;
7303 }
7304 
7305 OperandMatchResultTy
7306 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7307   SMLoc S = getLoc();
7308   int64_t Imm = 0;
7309 
7310   if (trySkipId("offset")) {
7311 
7312     bool Ok = false;
7313     if (skipToken(AsmToken::Colon, "expected a colon")) {
7314       if (trySkipId("swizzle")) {
7315         Ok = parseSwizzleMacro(Imm);
7316       } else {
7317         Ok = parseSwizzleOffset(Imm);
7318       }
7319     }
7320 
7321     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7322 
7323     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7324   } else {
7325     // Swizzle "offset" operand is optional.
7326     // If it is omitted, try parsing other optional operands.
7327     return parseOptionalOpr(Operands);
7328   }
7329 }
7330 
7331 bool
7332 AMDGPUOperand::isSwizzle() const {
7333   return isImmTy(ImmTySwizzle);
7334 }
7335 
7336 //===----------------------------------------------------------------------===//
7337 // VGPR Index Mode
7338 //===----------------------------------------------------------------------===//
7339 
7340 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7341 
7342   using namespace llvm::AMDGPU::VGPRIndexMode;
7343 
7344   if (trySkipToken(AsmToken::RParen)) {
7345     return OFF;
7346   }
7347 
7348   int64_t Imm = 0;
7349 
7350   while (true) {
7351     unsigned Mode = 0;
7352     SMLoc S = getLoc();
7353 
7354     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7355       if (trySkipId(IdSymbolic[ModeId])) {
7356         Mode = 1 << ModeId;
7357         break;
7358       }
7359     }
7360 
7361     if (Mode == 0) {
7362       Error(S, (Imm == 0)?
7363                "expected a VGPR index mode or a closing parenthesis" :
7364                "expected a VGPR index mode");
7365       return UNDEF;
7366     }
7367 
7368     if (Imm & Mode) {
7369       Error(S, "duplicate VGPR index mode");
7370       return UNDEF;
7371     }
7372     Imm |= Mode;
7373 
7374     if (trySkipToken(AsmToken::RParen))
7375       break;
7376     if (!skipToken(AsmToken::Comma,
7377                    "expected a comma or a closing parenthesis"))
7378       return UNDEF;
7379   }
7380 
7381   return Imm;
7382 }
7383 
7384 OperandMatchResultTy
7385 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7386 
7387   using namespace llvm::AMDGPU::VGPRIndexMode;
7388 
7389   int64_t Imm = 0;
7390   SMLoc S = getLoc();
7391 
7392   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7393     Imm = parseGPRIdxMacro();
7394     if (Imm == UNDEF)
7395       return MatchOperand_ParseFail;
7396   } else {
7397     if (getParser().parseAbsoluteExpression(Imm))
7398       return MatchOperand_ParseFail;
7399     if (Imm < 0 || !isUInt<4>(Imm)) {
7400       Error(S, "invalid immediate: only 4-bit values are legal");
7401       return MatchOperand_ParseFail;
7402     }
7403   }
7404 
7405   Operands.push_back(
7406       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7407   return MatchOperand_Success;
7408 }
7409 
7410 bool AMDGPUOperand::isGPRIdxMode() const {
7411   return isImmTy(ImmTyGprIdxMode);
7412 }
7413 
7414 //===----------------------------------------------------------------------===//
7415 // sopp branch targets
7416 //===----------------------------------------------------------------------===//
7417 
7418 OperandMatchResultTy
7419 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7420 
7421   // Make sure we are not parsing something
7422   // that looks like a label or an expression but is not.
7423   // This will improve error messages.
7424   if (isRegister() || isModifier())
7425     return MatchOperand_NoMatch;
7426 
7427   if (!parseExpr(Operands))
7428     return MatchOperand_ParseFail;
7429 
7430   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7431   assert(Opr.isImm() || Opr.isExpr());
7432   SMLoc Loc = Opr.getStartLoc();
7433 
7434   // Currently we do not support arbitrary expressions as branch targets.
7435   // Only labels and absolute expressions are accepted.
7436   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7437     Error(Loc, "expected an absolute expression or a label");
7438   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7439     Error(Loc, "expected a 16-bit signed jump offset");
7440   }
7441 
7442   return MatchOperand_Success;
7443 }
7444 
7445 //===----------------------------------------------------------------------===//
7446 // Boolean holding registers
7447 //===----------------------------------------------------------------------===//
7448 
7449 OperandMatchResultTy
7450 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7451   return parseReg(Operands);
7452 }
7453 
7454 //===----------------------------------------------------------------------===//
7455 // mubuf
7456 //===----------------------------------------------------------------------===//
7457 
7458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7459   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7460 }
7461 
7462 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7463                                    const OperandVector &Operands,
7464                                    bool IsAtomic,
7465                                    bool IsLds) {
7466   OptionalImmIndexMap OptionalIdx;
7467   unsigned FirstOperandIdx = 1;
7468   bool IsAtomicReturn = false;
7469 
7470   if (IsAtomic) {
7471     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7472       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7473       if (!Op.isCPol())
7474         continue;
7475       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7476       break;
7477     }
7478 
7479     if (!IsAtomicReturn) {
7480       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7481       if (NewOpc != -1)
7482         Inst.setOpcode(NewOpc);
7483     }
7484 
7485     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7486                       SIInstrFlags::IsAtomicRet;
7487   }
7488 
7489   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7490     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7491 
7492     // Add the register arguments
7493     if (Op.isReg()) {
7494       Op.addRegOperands(Inst, 1);
7495       // Insert a tied src for atomic return dst.
7496       // This cannot be postponed as subsequent calls to
7497       // addImmOperands rely on correct number of MC operands.
7498       if (IsAtomicReturn && i == FirstOperandIdx)
7499         Op.addRegOperands(Inst, 1);
7500       continue;
7501     }
7502 
7503     // Handle the case where soffset is an immediate
7504     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7505       Op.addImmOperands(Inst, 1);
7506       continue;
7507     }
7508 
7509     // Handle tokens like 'offen' which are sometimes hard-coded into the
7510     // asm string.  There are no MCInst operands for these.
7511     if (Op.isToken()) {
7512       continue;
7513     }
7514     assert(Op.isImm());
7515 
7516     // Handle optional arguments
7517     OptionalIdx[Op.getImmTy()] = i;
7518   }
7519 
7520   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7521   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7522 
7523   if (!IsLds) { // tfe is not legal with lds opcodes
7524     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7525   }
7526   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7527 }
7528 
7529 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7530   OptionalImmIndexMap OptionalIdx;
7531 
7532   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7533     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7534 
7535     // Add the register arguments
7536     if (Op.isReg()) {
7537       Op.addRegOperands(Inst, 1);
7538       continue;
7539     }
7540 
7541     // Handle the case where soffset is an immediate
7542     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7543       Op.addImmOperands(Inst, 1);
7544       continue;
7545     }
7546 
7547     // Handle tokens like 'offen' which are sometimes hard-coded into the
7548     // asm string.  There are no MCInst operands for these.
7549     if (Op.isToken()) {
7550       continue;
7551     }
7552     assert(Op.isImm());
7553 
7554     // Handle optional arguments
7555     OptionalIdx[Op.getImmTy()] = i;
7556   }
7557 
7558   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7559                         AMDGPUOperand::ImmTyOffset);
7560   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7561   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7562   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7563   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7564 }
7565 
7566 //===----------------------------------------------------------------------===//
7567 // mimg
7568 //===----------------------------------------------------------------------===//
7569 
7570 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7571                               bool IsAtomic) {
7572   unsigned I = 1;
7573   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7574   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7575     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7576   }
7577 
7578   if (IsAtomic) {
7579     // Add src, same as dst
7580     assert(Desc.getNumDefs() == 1);
7581     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7582   }
7583 
7584   OptionalImmIndexMap OptionalIdx;
7585 
7586   for (unsigned E = Operands.size(); I != E; ++I) {
7587     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7588 
7589     // Add the register arguments
7590     if (Op.isReg()) {
7591       Op.addRegOperands(Inst, 1);
7592     } else if (Op.isImmModifier()) {
7593       OptionalIdx[Op.getImmTy()] = I;
7594     } else if (!Op.isToken()) {
7595       llvm_unreachable("unexpected operand type");
7596     }
7597   }
7598 
7599   bool IsGFX10Plus = isGFX10Plus();
7600 
7601   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7602   if (IsGFX10Plus)
7603     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7604   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7605   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7606   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7607   if (IsGFX10Plus)
7608     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7609   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7610     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7611   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7612   if (!IsGFX10Plus)
7613     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7614   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7615 }
7616 
7617 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7618   cvtMIMG(Inst, Operands, true);
7619 }
7620 
7621 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7622   OptionalImmIndexMap OptionalIdx;
7623   bool IsAtomicReturn = false;
7624 
7625   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7626     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7627     if (!Op.isCPol())
7628       continue;
7629     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7630     break;
7631   }
7632 
7633   if (!IsAtomicReturn) {
7634     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7635     if (NewOpc != -1)
7636       Inst.setOpcode(NewOpc);
7637   }
7638 
7639   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7640                     SIInstrFlags::IsAtomicRet;
7641 
7642   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7643     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7644 
7645     // Add the register arguments
7646     if (Op.isReg()) {
7647       Op.addRegOperands(Inst, 1);
7648       if (IsAtomicReturn && i == 1)
7649         Op.addRegOperands(Inst, 1);
7650       continue;
7651     }
7652 
7653     // Handle the case where soffset is an immediate
7654     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7655       Op.addImmOperands(Inst, 1);
7656       continue;
7657     }
7658 
7659     // Handle tokens like 'offen' which are sometimes hard-coded into the
7660     // asm string.  There are no MCInst operands for these.
7661     if (Op.isToken()) {
7662       continue;
7663     }
7664     assert(Op.isImm());
7665 
7666     // Handle optional arguments
7667     OptionalIdx[Op.getImmTy()] = i;
7668   }
7669 
7670   if ((int)Inst.getNumOperands() <=
7671       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7673   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7674 }
7675 
7676 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7677                                       const OperandVector &Operands) {
7678   for (unsigned I = 1; I < Operands.size(); ++I) {
7679     auto &Operand = (AMDGPUOperand &)*Operands[I];
7680     if (Operand.isReg())
7681       Operand.addRegOperands(Inst, 1);
7682   }
7683 
7684   Inst.addOperand(MCOperand::createImm(1)); // a16
7685 }
7686 
7687 //===----------------------------------------------------------------------===//
7688 // smrd
7689 //===----------------------------------------------------------------------===//
7690 
7691 bool AMDGPUOperand::isSMRDOffset8() const {
7692   return isImm() && isUInt<8>(getImm());
7693 }
7694 
7695 bool AMDGPUOperand::isSMEMOffset() const {
7696   return isImm(); // Offset range is checked later by validator.
7697 }
7698 
7699 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7700   // 32-bit literals are only supported on CI and we only want to use them
7701   // when the offset is > 8-bits.
7702   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7703 }
7704 
7705 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7706   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7707 }
7708 
7709 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7710   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7711 }
7712 
7713 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7714   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7715 }
7716 
7717 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7718   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7719 }
7720 
7721 //===----------------------------------------------------------------------===//
7722 // vop3
7723 //===----------------------------------------------------------------------===//
7724 
7725 static bool ConvertOmodMul(int64_t &Mul) {
7726   if (Mul != 1 && Mul != 2 && Mul != 4)
7727     return false;
7728 
7729   Mul >>= 1;
7730   return true;
7731 }
7732 
7733 static bool ConvertOmodDiv(int64_t &Div) {
7734   if (Div == 1) {
7735     Div = 0;
7736     return true;
7737   }
7738 
7739   if (Div == 2) {
7740     Div = 3;
7741     return true;
7742   }
7743 
7744   return false;
7745 }
7746 
7747 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7748 // This is intentional and ensures compatibility with sp3.
7749 // See bug 35397 for details.
7750 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7751   if (BoundCtrl == 0 || BoundCtrl == 1) {
7752     BoundCtrl = 1;
7753     return true;
7754   }
7755   return false;
7756 }
7757 
7758 // Note: the order in this table matches the order of operands in AsmString.
7759 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7760   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7761   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7762   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7763   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7764   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7765   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7766   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7767   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7768   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7769   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7770   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7771   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7772   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7773   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7774   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7775   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7776   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7777   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7778   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7779   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7780   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7781   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7782   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7783   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7784   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7785   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7786   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7787   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7788   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7789   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7790   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7791   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7792   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7793   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7794   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7795   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7796   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7797   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7798   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7799   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7800   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7801 };
7802 
7803 void AMDGPUAsmParser::onBeginOfFile() {
7804   if (!getParser().getStreamer().getTargetStreamer() ||
7805       getSTI().getTargetTriple().getArch() == Triple::r600)
7806     return;
7807 
7808   if (!getTargetStreamer().getTargetID())
7809     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7810 
7811   if (isHsaAbiVersion3AndAbove(&getSTI()))
7812     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7813 }
7814 
7815 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7816 
7817   OperandMatchResultTy res = parseOptionalOpr(Operands);
7818 
7819   // This is a hack to enable hardcoded mandatory operands which follow
7820   // optional operands.
7821   //
7822   // Current design assumes that all operands after the first optional operand
7823   // are also optional. However implementation of some instructions violates
7824   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7825   //
7826   // To alleviate this problem, we have to (implicitly) parse extra operands
7827   // to make sure autogenerated parser of custom operands never hit hardcoded
7828   // mandatory operands.
7829 
7830   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7831     if (res != MatchOperand_Success ||
7832         isToken(AsmToken::EndOfStatement))
7833       break;
7834 
7835     trySkipToken(AsmToken::Comma);
7836     res = parseOptionalOpr(Operands);
7837   }
7838 
7839   return res;
7840 }
7841 
7842 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7843   OperandMatchResultTy res;
7844   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7845     // try to parse any optional operand here
7846     if (Op.IsBit) {
7847       res = parseNamedBit(Op.Name, Operands, Op.Type);
7848     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7849       res = parseOModOperand(Operands);
7850     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7851                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7852                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7853       res = parseSDWASel(Operands, Op.Name, Op.Type);
7854     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7855       res = parseSDWADstUnused(Operands);
7856     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7857                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7858                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7859                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7860       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7861                                         Op.ConvertResult);
7862     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7863       res = parseDim(Operands);
7864     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7865       res = parseCPol(Operands);
7866     } else {
7867       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7868       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7869         res = parseOperandArrayWithPrefix("neg", Operands,
7870                                           AMDGPUOperand::ImmTyBLGP,
7871                                           nullptr);
7872       }
7873     }
7874     if (res != MatchOperand_NoMatch) {
7875       return res;
7876     }
7877   }
7878   return MatchOperand_NoMatch;
7879 }
7880 
7881 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7882   StringRef Name = getTokenStr();
7883   if (Name == "mul") {
7884     return parseIntWithPrefix("mul", Operands,
7885                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7886   }
7887 
7888   if (Name == "div") {
7889     return parseIntWithPrefix("div", Operands,
7890                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7891   }
7892 
7893   return MatchOperand_NoMatch;
7894 }
7895 
7896 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7897   cvtVOP3P(Inst, Operands);
7898 
7899   int Opc = Inst.getOpcode();
7900 
7901   int SrcNum;
7902   const int Ops[] = { AMDGPU::OpName::src0,
7903                       AMDGPU::OpName::src1,
7904                       AMDGPU::OpName::src2 };
7905   for (SrcNum = 0;
7906        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7907        ++SrcNum);
7908   assert(SrcNum > 0);
7909 
7910   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7911   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7912 
7913   if ((OpSel & (1 << SrcNum)) != 0) {
7914     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7915     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7916     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7917   }
7918 }
7919 
7920 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7921       // 1. This operand is input modifiers
7922   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7923       // 2. This is not last operand
7924       && Desc.NumOperands > (OpNum + 1)
7925       // 3. Next operand is register class
7926       && Desc.OpInfo[OpNum + 1].RegClass != -1
7927       // 4. Next register is not tied to any other operand
7928       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7929 }
7930 
7931 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7932 {
7933   OptionalImmIndexMap OptionalIdx;
7934   unsigned Opc = Inst.getOpcode();
7935 
7936   unsigned I = 1;
7937   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7938   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7939     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7940   }
7941 
7942   for (unsigned E = Operands.size(); I != E; ++I) {
7943     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7944     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7945       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7946     } else if (Op.isInterpSlot() ||
7947                Op.isInterpAttr() ||
7948                Op.isAttrChan()) {
7949       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7950     } else if (Op.isImmModifier()) {
7951       OptionalIdx[Op.getImmTy()] = I;
7952     } else {
7953       llvm_unreachable("unhandled operand type");
7954     }
7955   }
7956 
7957   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7958     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7959   }
7960 
7961   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7962     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7963   }
7964 
7965   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7966     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7967   }
7968 }
7969 
7970 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7971                               OptionalImmIndexMap &OptionalIdx) {
7972   unsigned Opc = Inst.getOpcode();
7973 
7974   unsigned I = 1;
7975   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7976   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7977     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7978   }
7979 
7980   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7981     // This instruction has src modifiers
7982     for (unsigned E = Operands.size(); I != E; ++I) {
7983       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7984       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7985         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7986       } else if (Op.isImmModifier()) {
7987         OptionalIdx[Op.getImmTy()] = I;
7988       } else if (Op.isRegOrImm()) {
7989         Op.addRegOrImmOperands(Inst, 1);
7990       } else {
7991         llvm_unreachable("unhandled operand type");
7992       }
7993     }
7994   } else {
7995     // No src modifiers
7996     for (unsigned E = Operands.size(); I != E; ++I) {
7997       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7998       if (Op.isMod()) {
7999         OptionalIdx[Op.getImmTy()] = I;
8000       } else {
8001         Op.addRegOrImmOperands(Inst, 1);
8002       }
8003     }
8004   }
8005 
8006   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8007     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8008   }
8009 
8010   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8011     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8012   }
8013 
8014   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8015   // it has src2 register operand that is tied to dst operand
8016   // we don't allow modifiers for this operand in assembler so src2_modifiers
8017   // should be 0.
8018   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8019       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8020       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8021       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8022       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8023       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8024       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8025       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8026       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8027       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8028       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8029     auto it = Inst.begin();
8030     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8031     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8032     ++it;
8033     // Copy the operand to ensure it's not invalidated when Inst grows.
8034     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8035   }
8036 }
8037 
8038 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8039   OptionalImmIndexMap OptionalIdx;
8040   cvtVOP3(Inst, Operands, OptionalIdx);
8041 }
8042 
8043 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8044                                OptionalImmIndexMap &OptIdx) {
8045   const int Opc = Inst.getOpcode();
8046   const MCInstrDesc &Desc = MII.get(Opc);
8047 
8048   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8049 
8050   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8051     assert(!IsPacked);
8052     Inst.addOperand(Inst.getOperand(0));
8053   }
8054 
8055   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8056   // instruction, and then figure out where to actually put the modifiers
8057 
8058   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8059   if (OpSelIdx != -1) {
8060     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8061   }
8062 
8063   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8064   if (OpSelHiIdx != -1) {
8065     int DefaultVal = IsPacked ? -1 : 0;
8066     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8067                           DefaultVal);
8068   }
8069 
8070   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8071   if (NegLoIdx != -1) {
8072     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8073     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8074   }
8075 
8076   const int Ops[] = { AMDGPU::OpName::src0,
8077                       AMDGPU::OpName::src1,
8078                       AMDGPU::OpName::src2 };
8079   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8080                          AMDGPU::OpName::src1_modifiers,
8081                          AMDGPU::OpName::src2_modifiers };
8082 
8083   unsigned OpSel = 0;
8084   unsigned OpSelHi = 0;
8085   unsigned NegLo = 0;
8086   unsigned NegHi = 0;
8087 
8088   if (OpSelIdx != -1)
8089     OpSel = Inst.getOperand(OpSelIdx).getImm();
8090 
8091   if (OpSelHiIdx != -1)
8092     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8093 
8094   if (NegLoIdx != -1) {
8095     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8096     NegLo = Inst.getOperand(NegLoIdx).getImm();
8097     NegHi = Inst.getOperand(NegHiIdx).getImm();
8098   }
8099 
8100   for (int J = 0; J < 3; ++J) {
8101     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8102     if (OpIdx == -1)
8103       break;
8104 
8105     uint32_t ModVal = 0;
8106 
8107     if ((OpSel & (1 << J)) != 0)
8108       ModVal |= SISrcMods::OP_SEL_0;
8109 
8110     if ((OpSelHi & (1 << J)) != 0)
8111       ModVal |= SISrcMods::OP_SEL_1;
8112 
8113     if ((NegLo & (1 << J)) != 0)
8114       ModVal |= SISrcMods::NEG;
8115 
8116     if ((NegHi & (1 << J)) != 0)
8117       ModVal |= SISrcMods::NEG_HI;
8118 
8119     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8120 
8121     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8122   }
8123 }
8124 
8125 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8126   OptionalImmIndexMap OptIdx;
8127   cvtVOP3(Inst, Operands, OptIdx);
8128   cvtVOP3P(Inst, Operands, OptIdx);
8129 }
8130 
8131 //===----------------------------------------------------------------------===//
8132 // dpp
8133 //===----------------------------------------------------------------------===//
8134 
8135 bool AMDGPUOperand::isDPP8() const {
8136   return isImmTy(ImmTyDPP8);
8137 }
8138 
8139 bool AMDGPUOperand::isDPPCtrl() const {
8140   using namespace AMDGPU::DPP;
8141 
8142   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8143   if (result) {
8144     int64_t Imm = getImm();
8145     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8146            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8147            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8148            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8149            (Imm == DppCtrl::WAVE_SHL1) ||
8150            (Imm == DppCtrl::WAVE_ROL1) ||
8151            (Imm == DppCtrl::WAVE_SHR1) ||
8152            (Imm == DppCtrl::WAVE_ROR1) ||
8153            (Imm == DppCtrl::ROW_MIRROR) ||
8154            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8155            (Imm == DppCtrl::BCAST15) ||
8156            (Imm == DppCtrl::BCAST31) ||
8157            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8158            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8159   }
8160   return false;
8161 }
8162 
8163 //===----------------------------------------------------------------------===//
8164 // mAI
8165 //===----------------------------------------------------------------------===//
8166 
8167 bool AMDGPUOperand::isBLGP() const {
8168   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8169 }
8170 
8171 bool AMDGPUOperand::isCBSZ() const {
8172   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8173 }
8174 
8175 bool AMDGPUOperand::isABID() const {
8176   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8177 }
8178 
8179 bool AMDGPUOperand::isS16Imm() const {
8180   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8181 }
8182 
8183 bool AMDGPUOperand::isU16Imm() const {
8184   return isImm() && isUInt<16>(getImm());
8185 }
8186 
8187 //===----------------------------------------------------------------------===//
8188 // dim
8189 //===----------------------------------------------------------------------===//
8190 
8191 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8192   // We want to allow "dim:1D" etc.,
8193   // but the initial 1 is tokenized as an integer.
8194   std::string Token;
8195   if (isToken(AsmToken::Integer)) {
8196     SMLoc Loc = getToken().getEndLoc();
8197     Token = std::string(getTokenStr());
8198     lex();
8199     if (getLoc() != Loc)
8200       return false;
8201   }
8202 
8203   StringRef Suffix;
8204   if (!parseId(Suffix))
8205     return false;
8206   Token += Suffix;
8207 
8208   StringRef DimId = Token;
8209   if (DimId.startswith("SQ_RSRC_IMG_"))
8210     DimId = DimId.drop_front(12);
8211 
8212   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8213   if (!DimInfo)
8214     return false;
8215 
8216   Encoding = DimInfo->Encoding;
8217   return true;
8218 }
8219 
8220 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8221   if (!isGFX10Plus())
8222     return MatchOperand_NoMatch;
8223 
8224   SMLoc S = getLoc();
8225 
8226   if (!trySkipId("dim", AsmToken::Colon))
8227     return MatchOperand_NoMatch;
8228 
8229   unsigned Encoding;
8230   SMLoc Loc = getLoc();
8231   if (!parseDimId(Encoding)) {
8232     Error(Loc, "invalid dim value");
8233     return MatchOperand_ParseFail;
8234   }
8235 
8236   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8237                                               AMDGPUOperand::ImmTyDim));
8238   return MatchOperand_Success;
8239 }
8240 
8241 //===----------------------------------------------------------------------===//
8242 // dpp
8243 //===----------------------------------------------------------------------===//
8244 
8245 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8246   SMLoc S = getLoc();
8247 
8248   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8249     return MatchOperand_NoMatch;
8250 
8251   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8252 
8253   int64_t Sels[8];
8254 
8255   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8256     return MatchOperand_ParseFail;
8257 
8258   for (size_t i = 0; i < 8; ++i) {
8259     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8260       return MatchOperand_ParseFail;
8261 
8262     SMLoc Loc = getLoc();
8263     if (getParser().parseAbsoluteExpression(Sels[i]))
8264       return MatchOperand_ParseFail;
8265     if (0 > Sels[i] || 7 < Sels[i]) {
8266       Error(Loc, "expected a 3-bit value");
8267       return MatchOperand_ParseFail;
8268     }
8269   }
8270 
8271   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8272     return MatchOperand_ParseFail;
8273 
8274   unsigned DPP8 = 0;
8275   for (size_t i = 0; i < 8; ++i)
8276     DPP8 |= (Sels[i] << (i * 3));
8277 
8278   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8279   return MatchOperand_Success;
8280 }
8281 
8282 bool
8283 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8284                                     const OperandVector &Operands) {
8285   if (Ctrl == "row_newbcast")
8286     return isGFX90A();
8287 
8288   if (Ctrl == "row_share" ||
8289       Ctrl == "row_xmask")
8290     return isGFX10Plus();
8291 
8292   if (Ctrl == "wave_shl" ||
8293       Ctrl == "wave_shr" ||
8294       Ctrl == "wave_rol" ||
8295       Ctrl == "wave_ror" ||
8296       Ctrl == "row_bcast")
8297     return isVI() || isGFX9();
8298 
8299   return Ctrl == "row_mirror" ||
8300          Ctrl == "row_half_mirror" ||
8301          Ctrl == "quad_perm" ||
8302          Ctrl == "row_shl" ||
8303          Ctrl == "row_shr" ||
8304          Ctrl == "row_ror";
8305 }
8306 
8307 int64_t
8308 AMDGPUAsmParser::parseDPPCtrlPerm() {
8309   // quad_perm:[%d,%d,%d,%d]
8310 
8311   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8312     return -1;
8313 
8314   int64_t Val = 0;
8315   for (int i = 0; i < 4; ++i) {
8316     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8317       return -1;
8318 
8319     int64_t Temp;
8320     SMLoc Loc = getLoc();
8321     if (getParser().parseAbsoluteExpression(Temp))
8322       return -1;
8323     if (Temp < 0 || Temp > 3) {
8324       Error(Loc, "expected a 2-bit value");
8325       return -1;
8326     }
8327 
8328     Val += (Temp << i * 2);
8329   }
8330 
8331   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8332     return -1;
8333 
8334   return Val;
8335 }
8336 
8337 int64_t
8338 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8339   using namespace AMDGPU::DPP;
8340 
8341   // sel:%d
8342 
8343   int64_t Val;
8344   SMLoc Loc = getLoc();
8345 
8346   if (getParser().parseAbsoluteExpression(Val))
8347     return -1;
8348 
8349   struct DppCtrlCheck {
8350     int64_t Ctrl;
8351     int Lo;
8352     int Hi;
8353   };
8354 
8355   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8356     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8357     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8358     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8359     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8360     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8361     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8362     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8363     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8364     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8365     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8366     .Default({-1, 0, 0});
8367 
8368   bool Valid;
8369   if (Check.Ctrl == -1) {
8370     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8371     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8372   } else {
8373     Valid = Check.Lo <= Val && Val <= Check.Hi;
8374     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8375   }
8376 
8377   if (!Valid) {
8378     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8379     return -1;
8380   }
8381 
8382   return Val;
8383 }
8384 
8385 OperandMatchResultTy
8386 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8387   using namespace AMDGPU::DPP;
8388 
8389   if (!isToken(AsmToken::Identifier) ||
8390       !isSupportedDPPCtrl(getTokenStr(), Operands))
8391     return MatchOperand_NoMatch;
8392 
8393   SMLoc S = getLoc();
8394   int64_t Val = -1;
8395   StringRef Ctrl;
8396 
8397   parseId(Ctrl);
8398 
8399   if (Ctrl == "row_mirror") {
8400     Val = DppCtrl::ROW_MIRROR;
8401   } else if (Ctrl == "row_half_mirror") {
8402     Val = DppCtrl::ROW_HALF_MIRROR;
8403   } else {
8404     if (skipToken(AsmToken::Colon, "expected a colon")) {
8405       if (Ctrl == "quad_perm") {
8406         Val = parseDPPCtrlPerm();
8407       } else {
8408         Val = parseDPPCtrlSel(Ctrl);
8409       }
8410     }
8411   }
8412 
8413   if (Val == -1)
8414     return MatchOperand_ParseFail;
8415 
8416   Operands.push_back(
8417     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8418   return MatchOperand_Success;
8419 }
8420 
8421 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8422   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8423 }
8424 
8425 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8426   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8427 }
8428 
8429 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8430   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8431 }
8432 
8433 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8434   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8435 }
8436 
8437 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8438   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8439 }
8440 
8441 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8442   OptionalImmIndexMap OptionalIdx;
8443 
8444   unsigned Opc = Inst.getOpcode();
8445   bool HasModifiers =
8446       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8447   unsigned I = 1;
8448   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8449   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8450     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8451   }
8452 
8453   int Fi = 0;
8454   for (unsigned E = Operands.size(); I != E; ++I) {
8455     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8456                                             MCOI::TIED_TO);
8457     if (TiedTo != -1) {
8458       assert((unsigned)TiedTo < Inst.getNumOperands());
8459       // handle tied old or src2 for MAC instructions
8460       Inst.addOperand(Inst.getOperand(TiedTo));
8461     }
8462     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8463     // Add the register arguments
8464     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8465       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8466       // Skip it.
8467       continue;
8468     }
8469 
8470     if (IsDPP8) {
8471       if (Op.isDPP8()) {
8472         Op.addImmOperands(Inst, 1);
8473       } else if (HasModifiers &&
8474                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8475         Op.addRegWithFPInputModsOperands(Inst, 2);
8476       } else if (Op.isFI()) {
8477         Fi = Op.getImm();
8478       } else if (Op.isReg()) {
8479         Op.addRegOperands(Inst, 1);
8480       } else {
8481         llvm_unreachable("Invalid operand type");
8482       }
8483     } else {
8484       if (HasModifiers &&
8485           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8486         Op.addRegWithFPInputModsOperands(Inst, 2);
8487       } else if (Op.isReg()) {
8488         Op.addRegOperands(Inst, 1);
8489       } else if (Op.isDPPCtrl()) {
8490         Op.addImmOperands(Inst, 1);
8491       } else if (Op.isImm()) {
8492         // Handle optional arguments
8493         OptionalIdx[Op.getImmTy()] = I;
8494       } else {
8495         llvm_unreachable("Invalid operand type");
8496       }
8497     }
8498   }
8499 
8500   if (IsDPP8) {
8501     using namespace llvm::AMDGPU::DPP;
8502     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8503   } else {
8504     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8505     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8506     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8507     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8508       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8509     }
8510   }
8511 }
8512 
8513 //===----------------------------------------------------------------------===//
8514 // sdwa
8515 //===----------------------------------------------------------------------===//
8516 
8517 OperandMatchResultTy
8518 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8519                               AMDGPUOperand::ImmTy Type) {
8520   using namespace llvm::AMDGPU::SDWA;
8521 
8522   SMLoc S = getLoc();
8523   StringRef Value;
8524   OperandMatchResultTy res;
8525 
8526   SMLoc StringLoc;
8527   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8528   if (res != MatchOperand_Success) {
8529     return res;
8530   }
8531 
8532   int64_t Int;
8533   Int = StringSwitch<int64_t>(Value)
8534         .Case("BYTE_0", SdwaSel::BYTE_0)
8535         .Case("BYTE_1", SdwaSel::BYTE_1)
8536         .Case("BYTE_2", SdwaSel::BYTE_2)
8537         .Case("BYTE_3", SdwaSel::BYTE_3)
8538         .Case("WORD_0", SdwaSel::WORD_0)
8539         .Case("WORD_1", SdwaSel::WORD_1)
8540         .Case("DWORD", SdwaSel::DWORD)
8541         .Default(0xffffffff);
8542 
8543   if (Int == 0xffffffff) {
8544     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8545     return MatchOperand_ParseFail;
8546   }
8547 
8548   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8549   return MatchOperand_Success;
8550 }
8551 
8552 OperandMatchResultTy
8553 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8554   using namespace llvm::AMDGPU::SDWA;
8555 
8556   SMLoc S = getLoc();
8557   StringRef Value;
8558   OperandMatchResultTy res;
8559 
8560   SMLoc StringLoc;
8561   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8562   if (res != MatchOperand_Success) {
8563     return res;
8564   }
8565 
8566   int64_t Int;
8567   Int = StringSwitch<int64_t>(Value)
8568         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8569         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8570         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8571         .Default(0xffffffff);
8572 
8573   if (Int == 0xffffffff) {
8574     Error(StringLoc, "invalid dst_unused value");
8575     return MatchOperand_ParseFail;
8576   }
8577 
8578   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8579   return MatchOperand_Success;
8580 }
8581 
8582 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8583   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8584 }
8585 
8586 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8587   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8588 }
8589 
8590 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8591   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8592 }
8593 
8594 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8595   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8596 }
8597 
8598 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8599   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8600 }
8601 
8602 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8603                               uint64_t BasicInstType,
8604                               bool SkipDstVcc,
8605                               bool SkipSrcVcc) {
8606   using namespace llvm::AMDGPU::SDWA;
8607 
8608   OptionalImmIndexMap OptionalIdx;
8609   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8610   bool SkippedVcc = false;
8611 
8612   unsigned I = 1;
8613   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8614   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8615     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8616   }
8617 
8618   for (unsigned E = Operands.size(); I != E; ++I) {
8619     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8620     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8621         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8622       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8623       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8624       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8625       // Skip VCC only if we didn't skip it on previous iteration.
8626       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8627       if (BasicInstType == SIInstrFlags::VOP2 &&
8628           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8629            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8630         SkippedVcc = true;
8631         continue;
8632       } else if (BasicInstType == SIInstrFlags::VOPC &&
8633                  Inst.getNumOperands() == 0) {
8634         SkippedVcc = true;
8635         continue;
8636       }
8637     }
8638     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8639       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8640     } else if (Op.isImm()) {
8641       // Handle optional arguments
8642       OptionalIdx[Op.getImmTy()] = I;
8643     } else {
8644       llvm_unreachable("Invalid operand type");
8645     }
8646     SkippedVcc = false;
8647   }
8648 
8649   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8650       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8651       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8652     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8653     switch (BasicInstType) {
8654     case SIInstrFlags::VOP1:
8655       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8656       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8657         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8658       }
8659       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8660       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8661       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8662       break;
8663 
8664     case SIInstrFlags::VOP2:
8665       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8666       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8667         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8668       }
8669       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8670       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8671       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8672       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8673       break;
8674 
8675     case SIInstrFlags::VOPC:
8676       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8677         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8678       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8679       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8680       break;
8681 
8682     default:
8683       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8684     }
8685   }
8686 
8687   // special case v_mac_{f16, f32}:
8688   // it has src2 register operand that is tied to dst operand
8689   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8690       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8691     auto it = Inst.begin();
8692     std::advance(
8693       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8694     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8695   }
8696 }
8697 
8698 //===----------------------------------------------------------------------===//
8699 // mAI
8700 //===----------------------------------------------------------------------===//
8701 
8702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8703   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8704 }
8705 
8706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8707   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8708 }
8709 
8710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8711   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8712 }
8713 
8714 /// Force static initialization.
8715 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8716   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8717   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8718 }
8719 
8720 #define GET_REGISTER_MATCHER
8721 #define GET_MATCHER_IMPLEMENTATION
8722 #define GET_MNEMONIC_SPELL_CHECKER
8723 #define GET_MNEMONIC_CHECKER
8724 #include "AMDGPUGenAsmMatcher.inc"
8725 
8726 // This function should be defined after auto-generated include so that we have
8727 // MatchClassKind enum defined
8728 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8729                                                      unsigned Kind) {
8730   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8731   // But MatchInstructionImpl() expects to meet token and fails to validate
8732   // operand. This method checks if we are given immediate operand but expect to
8733   // get corresponding token.
8734   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8735   switch (Kind) {
8736   case MCK_addr64:
8737     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8738   case MCK_gds:
8739     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8740   case MCK_lds:
8741     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8742   case MCK_idxen:
8743     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8744   case MCK_offen:
8745     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8746   case MCK_SSrcB32:
8747     // When operands have expression values, they will return true for isToken,
8748     // because it is not possible to distinguish between a token and an
8749     // expression at parse time. MatchInstructionImpl() will always try to
8750     // match an operand as a token, when isToken returns true, and when the
8751     // name of the expression is not a valid token, the match will fail,
8752     // so we need to handle it here.
8753     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8754   case MCK_SSrcF32:
8755     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8756   case MCK_SoppBrTarget:
8757     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8758   case MCK_VReg32OrOff:
8759     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8760   case MCK_InterpSlot:
8761     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8762   case MCK_Attr:
8763     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8764   case MCK_AttrChan:
8765     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8766   case MCK_ImmSMEMOffset:
8767     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8768   case MCK_SReg_64:
8769   case MCK_SReg_64_XEXEC:
8770     // Null is defined as a 32-bit register but
8771     // it should also be enabled with 64-bit operands.
8772     // The following code enables it for SReg_64 operands
8773     // used as source and destination. Remaining source
8774     // operands are handled in isInlinableImm.
8775     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8776   default:
8777     return Match_InvalidOperand;
8778   }
8779 }
8780 
8781 //===----------------------------------------------------------------------===//
8782 // endpgm
8783 //===----------------------------------------------------------------------===//
8784 
8785 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8786   SMLoc S = getLoc();
8787   int64_t Imm = 0;
8788 
8789   if (!parseExpr(Imm)) {
8790     // The operand is optional, if not present default to 0
8791     Imm = 0;
8792   }
8793 
8794   if (!isUInt<16>(Imm)) {
8795     Error(S, "expected a 16-bit value");
8796     return MatchOperand_ParseFail;
8797   }
8798 
8799   Operands.push_back(
8800       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8801   return MatchOperand_Success;
8802 }
8803 
8804 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8805