1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isHwreg() const;
823   bool isSendMsg() const;
824   bool isSwizzle() const;
825   bool isSMRDOffset8() const;
826   bool isSMEMOffset() const;
827   bool isSMRDLiteralOffset() const;
828   bool isDPP8() const;
829   bool isDPPCtrl() const;
830   bool isBLGP() const;
831   bool isCBSZ() const;
832   bool isABID() const;
833   bool isGPRIdxMode() const;
834   bool isS16Imm() const;
835   bool isU16Imm() const;
836   bool isEndpgm() const;
837 
838   StringRef getExpressionAsToken() const {
839     assert(isExpr());
840     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
841     return S->getSymbol().getName();
842   }
843 
844   StringRef getToken() const {
845     assert(isToken());
846 
847     if (Kind == Expression)
848       return getExpressionAsToken();
849 
850     return StringRef(Tok.Data, Tok.Length);
851   }
852 
853   int64_t getImm() const {
854     assert(isImm());
855     return Imm.Val;
856   }
857 
858   void setImm(int64_t Val) {
859     assert(isImm());
860     Imm.Val = Val;
861   }
862 
863   ImmTy getImmTy() const {
864     assert(isImm());
865     return Imm.Type;
866   }
867 
868   unsigned getReg() const override {
869     assert(isRegKind());
870     return Reg.RegNo;
871   }
872 
873   SMLoc getStartLoc() const override {
874     return StartLoc;
875   }
876 
877   SMLoc getEndLoc() const override {
878     return EndLoc;
879   }
880 
881   SMRange getLocRange() const {
882     return SMRange(StartLoc, EndLoc);
883   }
884 
885   Modifiers getModifiers() const {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     return isRegKind() ? Reg.Mods : Imm.Mods;
888   }
889 
890   void setModifiers(Modifiers Mods) {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     if (isRegKind())
893       Reg.Mods = Mods;
894     else
895       Imm.Mods = Mods;
896   }
897 
898   bool hasModifiers() const {
899     return getModifiers().hasModifiers();
900   }
901 
902   bool hasFPModifiers() const {
903     return getModifiers().hasFPModifiers();
904   }
905 
906   bool hasIntModifiers() const {
907     return getModifiers().hasIntModifiers();
908   }
909 
910   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
911 
912   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
913 
914   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
915 
916   template <unsigned Bitwidth>
917   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
918 
919   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<16>(Inst, N);
921   }
922 
923   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
924     addKImmFPOperands<32>(Inst, N);
925   }
926 
927   void addRegOperands(MCInst &Inst, unsigned N) const;
928 
929   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
930     addRegOperands(Inst, N);
931   }
932 
933   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
934     if (isRegKind())
935       addRegOperands(Inst, N);
936     else if (isExpr())
937       Inst.addOperand(MCOperand::createExpr(Expr));
938     else
939       addImmOperands(Inst, N);
940   }
941 
942   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
943     Modifiers Mods = getModifiers();
944     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
945     if (isRegKind()) {
946       addRegOperands(Inst, N);
947     } else {
948       addImmOperands(Inst, N, false);
949     }
950   }
951 
952   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasIntModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasFPModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
963     Modifiers Mods = getModifiers();
964     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
965     assert(isRegKind());
966     addRegOperands(Inst, N);
967   }
968 
969   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasIntModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasFPModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
980     if (isImm())
981       addImmOperands(Inst, N);
982     else {
983       assert(isExpr());
984       Inst.addOperand(MCOperand::createExpr(Expr));
985     }
986   }
987 
988   static void printImmTy(raw_ostream& OS, ImmTy Type) {
989     switch (Type) {
990     case ImmTyNone: OS << "None"; break;
991     case ImmTyGDS: OS << "GDS"; break;
992     case ImmTyLDS: OS << "LDS"; break;
993     case ImmTyOffen: OS << "Offen"; break;
994     case ImmTyIdxen: OS << "Idxen"; break;
995     case ImmTyAddr64: OS << "Addr64"; break;
996     case ImmTyOffset: OS << "Offset"; break;
997     case ImmTyInstOffset: OS << "InstOffset"; break;
998     case ImmTyOffset0: OS << "Offset0"; break;
999     case ImmTyOffset1: OS << "Offset1"; break;
1000     case ImmTyCPol: OS << "CPol"; break;
1001     case ImmTySWZ: OS << "SWZ"; break;
1002     case ImmTyTFE: OS << "TFE"; break;
1003     case ImmTyD16: OS << "D16"; break;
1004     case ImmTyFORMAT: OS << "FORMAT"; break;
1005     case ImmTyClampSI: OS << "ClampSI"; break;
1006     case ImmTyOModSI: OS << "OModSI"; break;
1007     case ImmTyDPP8: OS << "DPP8"; break;
1008     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1009     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1010     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1011     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1012     case ImmTyDppFi: OS << "FI"; break;
1013     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1014     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1015     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1016     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1017     case ImmTyDMask: OS << "DMask"; break;
1018     case ImmTyDim: OS << "Dim"; break;
1019     case ImmTyUNorm: OS << "UNorm"; break;
1020     case ImmTyDA: OS << "DA"; break;
1021     case ImmTyR128A16: OS << "R128A16"; break;
1022     case ImmTyA16: OS << "A16"; break;
1023     case ImmTyLWE: OS << "LWE"; break;
1024     case ImmTyOff: OS << "Off"; break;
1025     case ImmTyExpTgt: OS << "ExpTgt"; break;
1026     case ImmTyExpCompr: OS << "ExpCompr"; break;
1027     case ImmTyExpVM: OS << "ExpVM"; break;
1028     case ImmTyHwreg: OS << "Hwreg"; break;
1029     case ImmTySendMsg: OS << "SendMsg"; break;
1030     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1031     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1032     case ImmTyAttrChan: OS << "AttrChan"; break;
1033     case ImmTyOpSel: OS << "OpSel"; break;
1034     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1035     case ImmTyNegLo: OS << "NegLo"; break;
1036     case ImmTyNegHi: OS << "NegHi"; break;
1037     case ImmTySwizzle: OS << "Swizzle"; break;
1038     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1039     case ImmTyHigh: OS << "High"; break;
1040     case ImmTyBLGP: OS << "BLGP"; break;
1041     case ImmTyCBSZ: OS << "CBSZ"; break;
1042     case ImmTyABID: OS << "ABID"; break;
1043     case ImmTyEndpgm: OS << "Endpgm"; break;
1044     }
1045   }
1046 
1047   void print(raw_ostream &OS) const override {
1048     switch (Kind) {
1049     case Register:
1050       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1051       break;
1052     case Immediate:
1053       OS << '<' << getImm();
1054       if (getImmTy() != ImmTyNone) {
1055         OS << " type: "; printImmTy(OS, getImmTy());
1056       }
1057       OS << " mods: " << Imm.Mods << '>';
1058       break;
1059     case Token:
1060       OS << '\'' << getToken() << '\'';
1061       break;
1062     case Expression:
1063       OS << "<expr " << *Expr << '>';
1064       break;
1065     }
1066   }
1067 
1068   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1069                                       int64_t Val, SMLoc Loc,
1070                                       ImmTy Type = ImmTyNone,
1071                                       bool IsFPImm = false) {
1072     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1073     Op->Imm.Val = Val;
1074     Op->Imm.IsFPImm = IsFPImm;
1075     Op->Imm.Kind = ImmKindTyNone;
1076     Op->Imm.Type = Type;
1077     Op->Imm.Mods = Modifiers();
1078     Op->StartLoc = Loc;
1079     Op->EndLoc = Loc;
1080     return Op;
1081   }
1082 
1083   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1084                                         StringRef Str, SMLoc Loc,
1085                                         bool HasExplicitEncodingSize = true) {
1086     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1087     Res->Tok.Data = Str.data();
1088     Res->Tok.Length = Str.size();
1089     Res->StartLoc = Loc;
1090     Res->EndLoc = Loc;
1091     return Res;
1092   }
1093 
1094   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1095                                       unsigned RegNo, SMLoc S,
1096                                       SMLoc E) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1098     Op->Reg.RegNo = RegNo;
1099     Op->Reg.Mods = Modifiers();
1100     Op->StartLoc = S;
1101     Op->EndLoc = E;
1102     return Op;
1103   }
1104 
1105   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1106                                        const class MCExpr *Expr, SMLoc S) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1108     Op->Expr = Expr;
1109     Op->StartLoc = S;
1110     Op->EndLoc = S;
1111     return Op;
1112   }
1113 };
1114 
1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1116   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1117   return OS;
1118 }
1119 
1120 //===----------------------------------------------------------------------===//
1121 // AsmParser
1122 //===----------------------------------------------------------------------===//
1123 
1124 // Holds info related to the current kernel, e.g. count of SGPRs used.
1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1126 // .amdgpu_hsa_kernel or at EOF.
1127 class KernelScopeInfo {
1128   int SgprIndexUnusedMin = -1;
1129   int VgprIndexUnusedMin = -1;
1130   int AgprIndexUnusedMin = -1;
1131   MCContext *Ctx = nullptr;
1132   MCSubtargetInfo const *MSTI = nullptr;
1133 
1134   void usesSgprAt(int i) {
1135     if (i >= SgprIndexUnusedMin) {
1136       SgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol* const Sym =
1139           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1140         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1141       }
1142     }
1143   }
1144 
1145   void usesVgprAt(int i) {
1146     if (i >= VgprIndexUnusedMin) {
1147       VgprIndexUnusedMin = ++i;
1148       if (Ctx) {
1149         MCSymbol* const Sym =
1150           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1152                                          VgprIndexUnusedMin);
1153         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1154       }
1155     }
1156   }
1157 
1158   void usesAgprAt(int i) {
1159     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1160     if (!hasMAIInsts(*MSTI))
1161       return;
1162 
1163     if (i >= AgprIndexUnusedMin) {
1164       AgprIndexUnusedMin = ++i;
1165       if (Ctx) {
1166         MCSymbol* const Sym =
1167           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1168         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1169 
1170         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1171         MCSymbol* const vSym =
1172           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1173         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1174                                          VgprIndexUnusedMin);
1175         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1176       }
1177     }
1178   }
1179 
1180 public:
1181   KernelScopeInfo() = default;
1182 
1183   void initialize(MCContext &Context) {
1184     Ctx = &Context;
1185     MSTI = Ctx->getSubtargetInfo();
1186 
1187     usesSgprAt(SgprIndexUnusedMin = -1);
1188     usesVgprAt(VgprIndexUnusedMin = -1);
1189     if (hasMAIInsts(*MSTI)) {
1190       usesAgprAt(AgprIndexUnusedMin = -1);
1191     }
1192   }
1193 
1194   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1195                     unsigned RegWidth) {
1196     switch (RegKind) {
1197     case IS_SGPR:
1198       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1199       break;
1200     case IS_AGPR:
1201       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1202       break;
1203     case IS_VGPR:
1204       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1205       break;
1206     default:
1207       break;
1208     }
1209   }
1210 };
1211 
1212 class AMDGPUAsmParser : public MCTargetAsmParser {
1213   MCAsmParser &Parser;
1214 
1215   // Number of extra operands parsed after the first optional operand.
1216   // This may be necessary to skip hardcoded mandatory operands.
1217   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1218 
1219   unsigned ForcedEncodingSize = 0;
1220   bool ForcedDPP = false;
1221   bool ForcedSDWA = false;
1222   KernelScopeInfo KernelScope;
1223   unsigned CPolSeen;
1224 
1225   /// @name Auto-generated Match Functions
1226   /// {
1227 
1228 #define GET_ASSEMBLER_HEADER
1229 #include "AMDGPUGenAsmMatcher.inc"
1230 
1231   /// }
1232 
1233 private:
1234   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1235   bool OutOfRangeError(SMRange Range);
1236   /// Calculate VGPR/SGPR blocks required for given target, reserved
1237   /// registers, and user-specified NextFreeXGPR values.
1238   ///
1239   /// \param Features [in] Target features, used for bug corrections.
1240   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1241   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1242   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1243   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1244   /// descriptor field, if valid.
1245   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1246   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1247   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1248   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1249   /// \param VGPRBlocks [out] Result VGPR block count.
1250   /// \param SGPRBlocks [out] Result SGPR block count.
1251   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1252                           bool FlatScrUsed, bool XNACKUsed,
1253                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1254                           SMRange VGPRRange, unsigned NextFreeSGPR,
1255                           SMRange SGPRRange, unsigned &VGPRBlocks,
1256                           unsigned &SGPRBlocks);
1257   bool ParseDirectiveAMDGCNTarget();
1258   bool ParseDirectiveAMDHSAKernel();
1259   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1260   bool ParseDirectiveHSACodeObjectVersion();
1261   bool ParseDirectiveHSACodeObjectISA();
1262   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1263   bool ParseDirectiveAMDKernelCodeT();
1264   // TODO: Possibly make subtargetHasRegister const.
1265   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1266   bool ParseDirectiveAMDGPUHsaKernel();
1267 
1268   bool ParseDirectiveISAVersion();
1269   bool ParseDirectiveHSAMetadata();
1270   bool ParseDirectivePALMetadataBegin();
1271   bool ParseDirectivePALMetadata();
1272   bool ParseDirectiveAMDGPULDS();
1273 
1274   /// Common code to parse out a block of text (typically YAML) between start and
1275   /// end directives.
1276   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1277                            const char *AssemblerDirectiveEnd,
1278                            std::string &CollectString);
1279 
1280   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1281                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1282   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1283                            unsigned &RegNum, unsigned &RegWidth,
1284                            bool RestoreOnFailure = false);
1285   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1286                            unsigned &RegNum, unsigned &RegWidth,
1287                            SmallVectorImpl<AsmToken> &Tokens);
1288   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1289                            unsigned &RegWidth,
1290                            SmallVectorImpl<AsmToken> &Tokens);
1291   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1292                            unsigned &RegWidth,
1293                            SmallVectorImpl<AsmToken> &Tokens);
1294   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1295                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1296   bool ParseRegRange(unsigned& Num, unsigned& Width);
1297   unsigned getRegularReg(RegisterKind RegKind,
1298                          unsigned RegNum,
1299                          unsigned RegWidth,
1300                          SMLoc Loc);
1301 
1302   bool isRegister();
1303   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1304   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1305   void initializeGprCountSymbol(RegisterKind RegKind);
1306   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1307                              unsigned RegWidth);
1308   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1309                     bool IsAtomic, bool IsLds = false);
1310   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1311                  bool IsGdsHardcoded);
1312 
1313 public:
1314   enum AMDGPUMatchResultTy {
1315     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1316   };
1317   enum OperandMode {
1318     OperandMode_Default,
1319     OperandMode_NSA,
1320   };
1321 
1322   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1323 
1324   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1325                const MCInstrInfo &MII,
1326                const MCTargetOptions &Options)
1327       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1328     MCAsmParserExtension::Initialize(Parser);
1329 
1330     if (getFeatureBits().none()) {
1331       // Set default features.
1332       copySTI().ToggleFeature("southern-islands");
1333     }
1334 
1335     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1336 
1337     {
1338       // TODO: make those pre-defined variables read-only.
1339       // Currently there is none suitable machinery in the core llvm-mc for this.
1340       // MCSymbol::isRedefinable is intended for another purpose, and
1341       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1342       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1343       MCContext &Ctx = getContext();
1344       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1345         MCSymbol *Sym =
1346             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1350         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1351         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1352       } else {
1353         MCSymbol *Sym =
1354             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1355         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1356         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1357         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1358         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1359         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1360       }
1361       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1362         initializeGprCountSymbol(IS_VGPR);
1363         initializeGprCountSymbol(IS_SGPR);
1364       } else
1365         KernelScope.initialize(getContext());
1366     }
1367   }
1368 
1369   bool hasMIMG_R128() const {
1370     return AMDGPU::hasMIMG_R128(getSTI());
1371   }
1372 
1373   bool hasPackedD16() const {
1374     return AMDGPU::hasPackedD16(getSTI());
1375   }
1376 
1377   bool hasGFX10A16() const {
1378     return AMDGPU::hasGFX10A16(getSTI());
1379   }
1380 
1381   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1382 
1383   bool isSI() const {
1384     return AMDGPU::isSI(getSTI());
1385   }
1386 
1387   bool isCI() const {
1388     return AMDGPU::isCI(getSTI());
1389   }
1390 
1391   bool isVI() const {
1392     return AMDGPU::isVI(getSTI());
1393   }
1394 
1395   bool isGFX9() const {
1396     return AMDGPU::isGFX9(getSTI());
1397   }
1398 
1399   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1400   bool isGFX90A() const {
1401     return AMDGPU::isGFX90A(getSTI());
1402   }
1403 
1404   bool isGFX940() const {
1405     return AMDGPU::isGFX940(getSTI());
1406   }
1407 
1408   bool isGFX9Plus() const {
1409     return AMDGPU::isGFX9Plus(getSTI());
1410   }
1411 
1412   bool isGFX10() const {
1413     return AMDGPU::isGFX10(getSTI());
1414   }
1415 
1416   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1417 
1418   bool isGFX10_BEncoding() const {
1419     return AMDGPU::isGFX10_BEncoding(getSTI());
1420   }
1421 
1422   bool hasInv2PiInlineImm() const {
1423     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1424   }
1425 
1426   bool hasFlatOffsets() const {
1427     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1428   }
1429 
1430   bool hasArchitectedFlatScratch() const {
1431     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1432   }
1433 
1434   bool hasSGPR102_SGPR103() const {
1435     return !isVI() && !isGFX9();
1436   }
1437 
1438   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1439 
1440   bool hasIntClamp() const {
1441     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1442   }
1443 
1444   AMDGPUTargetStreamer &getTargetStreamer() {
1445     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1446     return static_cast<AMDGPUTargetStreamer &>(TS);
1447   }
1448 
1449   const MCRegisterInfo *getMRI() const {
1450     // We need this const_cast because for some reason getContext() is not const
1451     // in MCAsmParser.
1452     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1453   }
1454 
1455   const MCInstrInfo *getMII() const {
1456     return &MII;
1457   }
1458 
1459   const FeatureBitset &getFeatureBits() const {
1460     return getSTI().getFeatureBits();
1461   }
1462 
1463   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1464   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1465   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1466 
1467   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1468   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1469   bool isForcedDPP() const { return ForcedDPP; }
1470   bool isForcedSDWA() const { return ForcedSDWA; }
1471   ArrayRef<unsigned> getMatchedVariants() const;
1472   StringRef getMatchedVariantName() const;
1473 
1474   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1475   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1476                      bool RestoreOnFailure);
1477   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1478   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1479                                         SMLoc &EndLoc) override;
1480   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1481   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1482                                       unsigned Kind) override;
1483   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1484                                OperandVector &Operands, MCStreamer &Out,
1485                                uint64_t &ErrorInfo,
1486                                bool MatchingInlineAsm) override;
1487   bool ParseDirective(AsmToken DirectiveID) override;
1488   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1489                                     OperandMode Mode = OperandMode_Default);
1490   StringRef parseMnemonicSuffix(StringRef Name);
1491   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1492                         SMLoc NameLoc, OperandVector &Operands) override;
1493   //bool ProcessInstruction(MCInst &Inst);
1494 
1495   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1496 
1497   OperandMatchResultTy
1498   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1499                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1500                      bool (*ConvertResult)(int64_t &) = nullptr);
1501 
1502   OperandMatchResultTy
1503   parseOperandArrayWithPrefix(const char *Prefix,
1504                               OperandVector &Operands,
1505                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1506                               bool (*ConvertResult)(int64_t&) = nullptr);
1507 
1508   OperandMatchResultTy
1509   parseNamedBit(StringRef Name, OperandVector &Operands,
1510                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1511   OperandMatchResultTy parseCPol(OperandVector &Operands);
1512   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1513                                              StringRef &Value,
1514                                              SMLoc &StringLoc);
1515 
1516   bool isModifier();
1517   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1521   bool parseSP3NegModifier();
1522   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1523   OperandMatchResultTy parseReg(OperandVector &Operands);
1524   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1525   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1527   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1529   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1530   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1531   OperandMatchResultTy parseUfmt(int64_t &Format);
1532   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1534   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1535   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1536   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1537   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1538   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1539 
1540   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1541   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1542   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1543   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1544 
1545   bool parseCnt(int64_t &IntVal);
1546   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1547 
1548   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1549   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1550   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1551 
1552   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1553 
1554 private:
1555   struct OperandInfoTy {
1556     SMLoc Loc;
1557     int64_t Id;
1558     bool IsSymbolic = false;
1559     bool IsDefined = false;
1560 
1561     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1562   };
1563 
1564   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1565   bool validateSendMsg(const OperandInfoTy &Msg,
1566                        const OperandInfoTy &Op,
1567                        const OperandInfoTy &Stream);
1568 
1569   bool parseHwregBody(OperandInfoTy &HwReg,
1570                       OperandInfoTy &Offset,
1571                       OperandInfoTy &Width);
1572   bool validateHwreg(const OperandInfoTy &HwReg,
1573                      const OperandInfoTy &Offset,
1574                      const OperandInfoTy &Width);
1575 
1576   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1577   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1578   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1579 
1580   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1581                       const OperandVector &Operands) const;
1582   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1583   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1584   SMLoc getLitLoc(const OperandVector &Operands) const;
1585   SMLoc getConstLoc(const OperandVector &Operands) const;
1586 
1587   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1588   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1589   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateSOPLiteral(const MCInst &Inst) const;
1591   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1593   bool validateIntClampSupported(const MCInst &Inst);
1594   bool validateMIMGAtomicDMask(const MCInst &Inst);
1595   bool validateMIMGGatherDMask(const MCInst &Inst);
1596   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1597   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1598   bool validateMIMGAddrSize(const MCInst &Inst);
1599   bool validateMIMGD16(const MCInst &Inst);
1600   bool validateMIMGDim(const MCInst &Inst);
1601   bool validateMIMGMSAA(const MCInst &Inst);
1602   bool validateOpSel(const MCInst &Inst);
1603   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateVccOperand(unsigned Reg) const;
1605   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1606   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateAGPRLdSt(const MCInst &Inst) const;
1609   bool validateVGPRAlign(const MCInst &Inst) const;
1610   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1612   bool validateDivScale(const MCInst &Inst);
1613   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1614                              const SMLoc &IDLoc);
1615   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1616   unsigned getConstantBusLimit(unsigned Opcode) const;
1617   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1618   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1619   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1620 
1621   bool isSupportedMnemo(StringRef Mnemo,
1622                         const FeatureBitset &FBS);
1623   bool isSupportedMnemo(StringRef Mnemo,
1624                         const FeatureBitset &FBS,
1625                         ArrayRef<unsigned> Variants);
1626   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1627 
1628   bool isId(const StringRef Id) const;
1629   bool isId(const AsmToken &Token, const StringRef Id) const;
1630   bool isToken(const AsmToken::TokenKind Kind) const;
1631   bool trySkipId(const StringRef Id);
1632   bool trySkipId(const StringRef Pref, const StringRef Id);
1633   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1634   bool trySkipToken(const AsmToken::TokenKind Kind);
1635   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1636   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1637   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1638 
1639   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1640   AsmToken::TokenKind getTokenKind() const;
1641   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1642   bool parseExpr(OperandVector &Operands);
1643   StringRef getTokenStr() const;
1644   AsmToken peekToken();
1645   AsmToken getToken() const;
1646   SMLoc getLoc() const;
1647   void lex();
1648 
1649 public:
1650   void onBeginOfFile() override;
1651 
1652   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1653   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1654 
1655   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1656   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1657   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1658   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1659   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1660   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1661 
1662   bool parseSwizzleOperand(int64_t &Op,
1663                            const unsigned MinVal,
1664                            const unsigned MaxVal,
1665                            const StringRef ErrMsg,
1666                            SMLoc &Loc);
1667   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1668                             const unsigned MinVal,
1669                             const unsigned MaxVal,
1670                             const StringRef ErrMsg);
1671   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1672   bool parseSwizzleOffset(int64_t &Imm);
1673   bool parseSwizzleMacro(int64_t &Imm);
1674   bool parseSwizzleQuadPerm(int64_t &Imm);
1675   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1676   bool parseSwizzleBroadcast(int64_t &Imm);
1677   bool parseSwizzleSwap(int64_t &Imm);
1678   bool parseSwizzleReverse(int64_t &Imm);
1679 
1680   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1681   int64_t parseGPRIdxMacro();
1682 
1683   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1684   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1685   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1686   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1687 
1688   AMDGPUOperand::Ptr defaultCPol() const;
1689 
1690   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1691   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1692   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1693   AMDGPUOperand::Ptr defaultFlatOffset() const;
1694 
1695   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1696 
1697   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1698                OptionalImmIndexMap &OptionalIdx);
1699   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1700   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1701   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1702   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1703                 OptionalImmIndexMap &OptionalIdx);
1704 
1705   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1706 
1707   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1708                bool IsAtomic = false);
1709   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1710   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1711 
1712   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1713 
1714   bool parseDimId(unsigned &Encoding);
1715   OperandMatchResultTy parseDim(OperandVector &Operands);
1716   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1717   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1718   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1719   int64_t parseDPPCtrlSel(StringRef Ctrl);
1720   int64_t parseDPPCtrlPerm();
1721   AMDGPUOperand::Ptr defaultRowMask() const;
1722   AMDGPUOperand::Ptr defaultBankMask() const;
1723   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1724   AMDGPUOperand::Ptr defaultFI() const;
1725   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1726   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1727 
1728   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1729                                     AMDGPUOperand::ImmTy Type);
1730   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1731   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1732   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1733   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1734   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1735   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1736   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1737                uint64_t BasicInstType,
1738                bool SkipDstVcc = false,
1739                bool SkipSrcVcc = false);
1740 
1741   AMDGPUOperand::Ptr defaultBLGP() const;
1742   AMDGPUOperand::Ptr defaultCBSZ() const;
1743   AMDGPUOperand::Ptr defaultABID() const;
1744 
1745   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1746   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1747 };
1748 
1749 struct OptionalOperand {
1750   const char *Name;
1751   AMDGPUOperand::ImmTy Type;
1752   bool IsBit;
1753   bool (*ConvertResult)(int64_t&);
1754 };
1755 
1756 } // end anonymous namespace
1757 
1758 // May be called with integer type with equivalent bitwidth.
1759 static const fltSemantics *getFltSemantics(unsigned Size) {
1760   switch (Size) {
1761   case 4:
1762     return &APFloat::IEEEsingle();
1763   case 8:
1764     return &APFloat::IEEEdouble();
1765   case 2:
1766     return &APFloat::IEEEhalf();
1767   default:
1768     llvm_unreachable("unsupported fp type");
1769   }
1770 }
1771 
1772 static const fltSemantics *getFltSemantics(MVT VT) {
1773   return getFltSemantics(VT.getSizeInBits() / 8);
1774 }
1775 
1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1777   switch (OperandType) {
1778   case AMDGPU::OPERAND_REG_IMM_INT32:
1779   case AMDGPU::OPERAND_REG_IMM_FP32:
1780   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1786   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1788   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1789   case AMDGPU::OPERAND_KIMM32:
1790     return &APFloat::IEEEsingle();
1791   case AMDGPU::OPERAND_REG_IMM_INT64:
1792   case AMDGPU::OPERAND_REG_IMM_FP64:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1796     return &APFloat::IEEEdouble();
1797   case AMDGPU::OPERAND_REG_IMM_INT16:
1798   case AMDGPU::OPERAND_REG_IMM_FP16:
1799   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1800   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1810   case AMDGPU::OPERAND_KIMM16:
1811     return &APFloat::IEEEhalf();
1812   default:
1813     llvm_unreachable("unsupported fp type");
1814   }
1815 }
1816 
1817 //===----------------------------------------------------------------------===//
1818 // Operand
1819 //===----------------------------------------------------------------------===//
1820 
1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1822   bool Lost;
1823 
1824   // Convert literal to single precision
1825   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1826                                                APFloat::rmNearestTiesToEven,
1827                                                &Lost);
1828   // We allow precision lost but not overflow or underflow
1829   if (Status != APFloat::opOK &&
1830       Lost &&
1831       ((Status & APFloat::opOverflow)  != 0 ||
1832        (Status & APFloat::opUnderflow) != 0)) {
1833     return false;
1834   }
1835 
1836   return true;
1837 }
1838 
1839 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1840   return isUIntN(Size, Val) || isIntN(Size, Val);
1841 }
1842 
1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1844   if (VT.getScalarType() == MVT::i16) {
1845     // FP immediate values are broken.
1846     return isInlinableIntLiteral(Val);
1847   }
1848 
1849   // f16/v2f16 operands work correctly for all values.
1850   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1851 }
1852 
1853 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1854 
1855   // This is a hack to enable named inline values like
1856   // shared_base with both 32-bit and 64-bit operands.
1857   // Note that these values are defined as
1858   // 32-bit operands only.
1859   if (isInlineValue()) {
1860     return true;
1861   }
1862 
1863   if (!isImmTy(ImmTyNone)) {
1864     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1865     return false;
1866   }
1867   // TODO: We should avoid using host float here. It would be better to
1868   // check the float bit values which is what a few other places do.
1869   // We've had bot failures before due to weird NaN support on mips hosts.
1870 
1871   APInt Literal(64, Imm.Val);
1872 
1873   if (Imm.IsFPImm) { // We got fp literal token
1874     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875       return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                           AsmParser->hasInv2PiInlineImm());
1877     }
1878 
1879     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1880     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1881       return false;
1882 
1883     if (type.getScalarSizeInBits() == 16) {
1884       return isInlineableLiteralOp16(
1885         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1886         type, AsmParser->hasInv2PiInlineImm());
1887     }
1888 
1889     // Check if single precision literal is inlinable
1890     return AMDGPU::isInlinableLiteral32(
1891       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1892       AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   // We got int literal token.
1896   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1897     return AMDGPU::isInlinableLiteral64(Imm.Val,
1898                                         AsmParser->hasInv2PiInlineImm());
1899   }
1900 
1901   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1902     return false;
1903   }
1904 
1905   if (type.getScalarSizeInBits() == 16) {
1906     return isInlineableLiteralOp16(
1907       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1908       type, AsmParser->hasInv2PiInlineImm());
1909   }
1910 
1911   return AMDGPU::isInlinableLiteral32(
1912     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1913     AsmParser->hasInv2PiInlineImm());
1914 }
1915 
1916 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1917   // Check that this immediate can be added as literal
1918   if (!isImmTy(ImmTyNone)) {
1919     return false;
1920   }
1921 
1922   if (!Imm.IsFPImm) {
1923     // We got int literal token.
1924 
1925     if (type == MVT::f64 && hasFPModifiers()) {
1926       // Cannot apply fp modifiers to int literals preserving the same semantics
1927       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1928       // disable these cases.
1929       return false;
1930     }
1931 
1932     unsigned Size = type.getSizeInBits();
1933     if (Size == 64)
1934       Size = 32;
1935 
1936     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1937     // types.
1938     return isSafeTruncation(Imm.Val, Size);
1939   }
1940 
1941   // We got fp literal token
1942   if (type == MVT::f64) { // Expected 64-bit fp operand
1943     // We would set low 64-bits of literal to zeroes but we accept this literals
1944     return true;
1945   }
1946 
1947   if (type == MVT::i64) { // Expected 64-bit int operand
1948     // We don't allow fp literals in 64-bit integer instructions. It is
1949     // unclear how we should encode them.
1950     return false;
1951   }
1952 
1953   // We allow fp literals with f16x2 operands assuming that the specified
1954   // literal goes into the lower half and the upper half is zero. We also
1955   // require that the literal may be losslessly converted to f16.
1956   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1957                      (type == MVT::v2i16)? MVT::i16 :
1958                      (type == MVT::v2f32)? MVT::f32 : type;
1959 
1960   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1961   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1962 }
1963 
1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1965   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1966 }
1967 
1968 bool AMDGPUOperand::isVRegWithInputMods() const {
1969   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1970          // GFX90A allows DPP on 64-bit operands.
1971          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1972           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1976   if (AsmParser->isVI())
1977     return isVReg32();
1978   else if (AsmParser->isGFX9Plus())
1979     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1980   else
1981     return false;
1982 }
1983 
1984 bool AMDGPUOperand::isSDWAFP16Operand() const {
1985   return isSDWAOperand(MVT::f16);
1986 }
1987 
1988 bool AMDGPUOperand::isSDWAFP32Operand() const {
1989   return isSDWAOperand(MVT::f32);
1990 }
1991 
1992 bool AMDGPUOperand::isSDWAInt16Operand() const {
1993   return isSDWAOperand(MVT::i16);
1994 }
1995 
1996 bool AMDGPUOperand::isSDWAInt32Operand() const {
1997   return isSDWAOperand(MVT::i32);
1998 }
1999 
2000 bool AMDGPUOperand::isBoolReg() const {
2001   auto FB = AsmParser->getFeatureBits();
2002   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2003                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2004 }
2005 
2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2007 {
2008   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2009   assert(Size == 2 || Size == 4 || Size == 8);
2010 
2011   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2012 
2013   if (Imm.Mods.Abs) {
2014     Val &= ~FpSignMask;
2015   }
2016   if (Imm.Mods.Neg) {
2017     Val ^= FpSignMask;
2018   }
2019 
2020   return Val;
2021 }
2022 
2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2024   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2025                              Inst.getNumOperands())) {
2026     addLiteralImmOperand(Inst, Imm.Val,
2027                          ApplyModifiers &
2028                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029   } else {
2030     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2031     Inst.addOperand(MCOperand::createImm(Imm.Val));
2032     setImmKindNone();
2033   }
2034 }
2035 
2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2037   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2038   auto OpNum = Inst.getNumOperands();
2039   // Check that this operand accepts literals
2040   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2041 
2042   if (ApplyModifiers) {
2043     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2044     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2045     Val = applyInputFPModifiers(Val, Size);
2046   }
2047 
2048   APInt Literal(64, Val);
2049   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2050 
2051   if (Imm.IsFPImm) { // We got fp literal token
2052     switch (OpTy) {
2053     case AMDGPU::OPERAND_REG_IMM_INT64:
2054     case AMDGPU::OPERAND_REG_IMM_FP64:
2055     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2056     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2057     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2058       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2059                                        AsmParser->hasInv2PiInlineImm())) {
2060         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2061         setImmKindConst();
2062         return;
2063       }
2064 
2065       // Non-inlineable
2066       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2067         // For fp operands we check if low 32 bits are zeros
2068         if (Literal.getLoBits(32) != 0) {
2069           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2070           "Can't encode literal as exact 64-bit floating-point operand. "
2071           "Low 32-bits will be set to zero");
2072         }
2073 
2074         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2075         setImmKindLiteral();
2076         return;
2077       }
2078 
2079       // We don't allow fp literals in 64-bit integer instructions. It is
2080       // unclear how we should encode them. This case should be checked earlier
2081       // in predicate methods (isLiteralImm())
2082       llvm_unreachable("fp literal in 64-bit integer instruction.");
2083 
2084     case AMDGPU::OPERAND_REG_IMM_INT32:
2085     case AMDGPU::OPERAND_REG_IMM_FP32:
2086     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2087     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2088     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2090     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2091     case AMDGPU::OPERAND_REG_IMM_INT16:
2092     case AMDGPU::OPERAND_REG_IMM_FP16:
2093     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2094     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2095     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2096     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2097     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2098     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2099     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2100     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2102     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2103     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2104     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2105     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2106     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2107     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2108     case AMDGPU::OPERAND_KIMM32:
2109     case AMDGPU::OPERAND_KIMM16: {
2110       bool lost;
2111       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2112       // Convert literal to single precision
2113       FPLiteral.convert(*getOpFltSemantics(OpTy),
2114                         APFloat::rmNearestTiesToEven, &lost);
2115       // We allow precision lost but not overflow or underflow. This should be
2116       // checked earlier in isLiteralImm()
2117 
2118       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2119       Inst.addOperand(MCOperand::createImm(ImmVal));
2120       setImmKindLiteral();
2121       return;
2122     }
2123     default:
2124       llvm_unreachable("invalid operand size");
2125     }
2126 
2127     return;
2128   }
2129 
2130   // We got int literal token.
2131   // Only sign extend inline immediates.
2132   switch (OpTy) {
2133   case AMDGPU::OPERAND_REG_IMM_INT32:
2134   case AMDGPU::OPERAND_REG_IMM_FP32:
2135   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2136   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2138   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2139   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2140   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2141   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2142   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2143   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2144   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2145   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2146     if (isSafeTruncation(Val, 32) &&
2147         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2148                                      AsmParser->hasInv2PiInlineImm())) {
2149       Inst.addOperand(MCOperand::createImm(Val));
2150       setImmKindConst();
2151       return;
2152     }
2153 
2154     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2155     setImmKindLiteral();
2156     return;
2157 
2158   case AMDGPU::OPERAND_REG_IMM_INT64:
2159   case AMDGPU::OPERAND_REG_IMM_FP64:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2163     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2164       Inst.addOperand(MCOperand::createImm(Val));
2165       setImmKindConst();
2166       return;
2167     }
2168 
2169     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2170     setImmKindLiteral();
2171     return;
2172 
2173   case AMDGPU::OPERAND_REG_IMM_INT16:
2174   case AMDGPU::OPERAND_REG_IMM_FP16:
2175   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2176   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2177   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2179   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2180     if (isSafeTruncation(Val, 16) &&
2181         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2182                                      AsmParser->hasInv2PiInlineImm())) {
2183       Inst.addOperand(MCOperand::createImm(Val));
2184       setImmKindConst();
2185       return;
2186     }
2187 
2188     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2189     setImmKindLiteral();
2190     return;
2191 
2192   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2194   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2195   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2196     assert(isSafeTruncation(Val, 16));
2197     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2198                                         AsmParser->hasInv2PiInlineImm()));
2199 
2200     Inst.addOperand(MCOperand::createImm(Val));
2201     return;
2202   }
2203   case AMDGPU::OPERAND_KIMM32:
2204     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2205     setImmKindNone();
2206     return;
2207   case AMDGPU::OPERAND_KIMM16:
2208     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2209     setImmKindNone();
2210     return;
2211   default:
2212     llvm_unreachable("invalid operand size");
2213   }
2214 }
2215 
2216 template <unsigned Bitwidth>
2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2218   APInt Literal(64, Imm.Val);
2219   setImmKindNone();
2220 
2221   if (!Imm.IsFPImm) {
2222     // We got int literal token.
2223     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2224     return;
2225   }
2226 
2227   bool Lost;
2228   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2229   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2230                     APFloat::rmNearestTiesToEven, &Lost);
2231   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2232 }
2233 
2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2235   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2236 }
2237 
2238 static bool isInlineValue(unsigned Reg) {
2239   switch (Reg) {
2240   case AMDGPU::SRC_SHARED_BASE:
2241   case AMDGPU::SRC_SHARED_LIMIT:
2242   case AMDGPU::SRC_PRIVATE_BASE:
2243   case AMDGPU::SRC_PRIVATE_LIMIT:
2244   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2245     return true;
2246   case AMDGPU::SRC_VCCZ:
2247   case AMDGPU::SRC_EXECZ:
2248   case AMDGPU::SRC_SCC:
2249     return true;
2250   case AMDGPU::SGPR_NULL:
2251     return true;
2252   default:
2253     return false;
2254   }
2255 }
2256 
2257 bool AMDGPUOperand::isInlineValue() const {
2258   return isRegKind() && ::isInlineValue(getReg());
2259 }
2260 
2261 //===----------------------------------------------------------------------===//
2262 // AsmParser
2263 //===----------------------------------------------------------------------===//
2264 
2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2266   if (Is == IS_VGPR) {
2267     switch (RegWidth) {
2268       default: return -1;
2269       case 32:
2270         return AMDGPU::VGPR_32RegClassID;
2271       case 64:
2272         return AMDGPU::VReg_64RegClassID;
2273       case 96:
2274         return AMDGPU::VReg_96RegClassID;
2275       case 128:
2276         return AMDGPU::VReg_128RegClassID;
2277       case 160:
2278         return AMDGPU::VReg_160RegClassID;
2279       case 192:
2280         return AMDGPU::VReg_192RegClassID;
2281       case 224:
2282         return AMDGPU::VReg_224RegClassID;
2283       case 256:
2284         return AMDGPU::VReg_256RegClassID;
2285       case 512:
2286         return AMDGPU::VReg_512RegClassID;
2287       case 1024:
2288         return AMDGPU::VReg_1024RegClassID;
2289     }
2290   } else if (Is == IS_TTMP) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::TTMP_32RegClassID;
2295       case 64:
2296         return AMDGPU::TTMP_64RegClassID;
2297       case 128:
2298         return AMDGPU::TTMP_128RegClassID;
2299       case 256:
2300         return AMDGPU::TTMP_256RegClassID;
2301       case 512:
2302         return AMDGPU::TTMP_512RegClassID;
2303     }
2304   } else if (Is == IS_SGPR) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::SGPR_32RegClassID;
2309       case 64:
2310         return AMDGPU::SGPR_64RegClassID;
2311       case 96:
2312         return AMDGPU::SGPR_96RegClassID;
2313       case 128:
2314         return AMDGPU::SGPR_128RegClassID;
2315       case 160:
2316         return AMDGPU::SGPR_160RegClassID;
2317       case 192:
2318         return AMDGPU::SGPR_192RegClassID;
2319       case 224:
2320         return AMDGPU::SGPR_224RegClassID;
2321       case 256:
2322         return AMDGPU::SGPR_256RegClassID;
2323       case 512:
2324         return AMDGPU::SGPR_512RegClassID;
2325     }
2326   } else if (Is == IS_AGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::AGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::AReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::AReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::AReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::AReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::AReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::AReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::AReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::AReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::AReg_1024RegClassID;
2349     }
2350   }
2351   return -1;
2352 }
2353 
2354 static unsigned getSpecialRegForName(StringRef RegName) {
2355   return StringSwitch<unsigned>(RegName)
2356     .Case("exec", AMDGPU::EXEC)
2357     .Case("vcc", AMDGPU::VCC)
2358     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2359     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2360     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2361     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2362     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2363     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2364     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2365     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2366     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2367     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2368     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2369     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2370     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2371     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2372     .Case("m0", AMDGPU::M0)
2373     .Case("vccz", AMDGPU::SRC_VCCZ)
2374     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2375     .Case("execz", AMDGPU::SRC_EXECZ)
2376     .Case("src_execz", AMDGPU::SRC_EXECZ)
2377     .Case("scc", AMDGPU::SRC_SCC)
2378     .Case("src_scc", AMDGPU::SRC_SCC)
2379     .Case("tba", AMDGPU::TBA)
2380     .Case("tma", AMDGPU::TMA)
2381     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2382     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2383     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2384     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2385     .Case("vcc_lo", AMDGPU::VCC_LO)
2386     .Case("vcc_hi", AMDGPU::VCC_HI)
2387     .Case("exec_lo", AMDGPU::EXEC_LO)
2388     .Case("exec_hi", AMDGPU::EXEC_HI)
2389     .Case("tma_lo", AMDGPU::TMA_LO)
2390     .Case("tma_hi", AMDGPU::TMA_HI)
2391     .Case("tba_lo", AMDGPU::TBA_LO)
2392     .Case("tba_hi", AMDGPU::TBA_HI)
2393     .Case("pc", AMDGPU::PC_REG)
2394     .Case("null", AMDGPU::SGPR_NULL)
2395     .Default(AMDGPU::NoRegister);
2396 }
2397 
2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2399                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2400   auto R = parseRegister();
2401   if (!R) return true;
2402   assert(R->isReg());
2403   RegNo = R->getReg();
2404   StartLoc = R->getStartLoc();
2405   EndLoc = R->getEndLoc();
2406   return false;
2407 }
2408 
2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2410                                     SMLoc &EndLoc) {
2411   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2412 }
2413 
2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2415                                                        SMLoc &StartLoc,
2416                                                        SMLoc &EndLoc) {
2417   bool Result =
2418       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2419   bool PendingErrors = getParser().hasPendingError();
2420   getParser().clearPendingErrors();
2421   if (PendingErrors)
2422     return MatchOperand_ParseFail;
2423   if (Result)
2424     return MatchOperand_NoMatch;
2425   return MatchOperand_Success;
2426 }
2427 
2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2429                                             RegisterKind RegKind, unsigned Reg1,
2430                                             SMLoc Loc) {
2431   switch (RegKind) {
2432   case IS_SPECIAL:
2433     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2434       Reg = AMDGPU::EXEC;
2435       RegWidth = 64;
2436       return true;
2437     }
2438     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2439       Reg = AMDGPU::FLAT_SCR;
2440       RegWidth = 64;
2441       return true;
2442     }
2443     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2444       Reg = AMDGPU::XNACK_MASK;
2445       RegWidth = 64;
2446       return true;
2447     }
2448     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2449       Reg = AMDGPU::VCC;
2450       RegWidth = 64;
2451       return true;
2452     }
2453     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2454       Reg = AMDGPU::TBA;
2455       RegWidth = 64;
2456       return true;
2457     }
2458     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2459       Reg = AMDGPU::TMA;
2460       RegWidth = 64;
2461       return true;
2462     }
2463     Error(Loc, "register does not fit in the list");
2464     return false;
2465   case IS_VGPR:
2466   case IS_SGPR:
2467   case IS_AGPR:
2468   case IS_TTMP:
2469     if (Reg1 != Reg + RegWidth / 32) {
2470       Error(Loc, "registers in a list must have consecutive indices");
2471       return false;
2472     }
2473     RegWidth += 32;
2474     return true;
2475   default:
2476     llvm_unreachable("unexpected register kind");
2477   }
2478 }
2479 
2480 struct RegInfo {
2481   StringLiteral Name;
2482   RegisterKind Kind;
2483 };
2484 
2485 static constexpr RegInfo RegularRegisters[] = {
2486   {{"v"},    IS_VGPR},
2487   {{"s"},    IS_SGPR},
2488   {{"ttmp"}, IS_TTMP},
2489   {{"acc"},  IS_AGPR},
2490   {{"a"},    IS_AGPR},
2491 };
2492 
2493 static bool isRegularReg(RegisterKind Kind) {
2494   return Kind == IS_VGPR ||
2495          Kind == IS_SGPR ||
2496          Kind == IS_TTMP ||
2497          Kind == IS_AGPR;
2498 }
2499 
2500 static const RegInfo* getRegularRegInfo(StringRef Str) {
2501   for (const RegInfo &Reg : RegularRegisters)
2502     if (Str.startswith(Reg.Name))
2503       return &Reg;
2504   return nullptr;
2505 }
2506 
2507 static bool getRegNum(StringRef Str, unsigned& Num) {
2508   return !Str.getAsInteger(10, Num);
2509 }
2510 
2511 bool
2512 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2513                             const AsmToken &NextToken) const {
2514 
2515   // A list of consecutive registers: [s0,s1,s2,s3]
2516   if (Token.is(AsmToken::LBrac))
2517     return true;
2518 
2519   if (!Token.is(AsmToken::Identifier))
2520     return false;
2521 
2522   // A single register like s0 or a range of registers like s[0:1]
2523 
2524   StringRef Str = Token.getString();
2525   const RegInfo *Reg = getRegularRegInfo(Str);
2526   if (Reg) {
2527     StringRef RegName = Reg->Name;
2528     StringRef RegSuffix = Str.substr(RegName.size());
2529     if (!RegSuffix.empty()) {
2530       unsigned Num;
2531       // A single register with an index: rXX
2532       if (getRegNum(RegSuffix, Num))
2533         return true;
2534     } else {
2535       // A range of registers: r[XX:YY].
2536       if (NextToken.is(AsmToken::LBrac))
2537         return true;
2538     }
2539   }
2540 
2541   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2542 }
2543 
2544 bool
2545 AMDGPUAsmParser::isRegister()
2546 {
2547   return isRegister(getToken(), peekToken());
2548 }
2549 
2550 unsigned
2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2552                                unsigned RegNum,
2553                                unsigned RegWidth,
2554                                SMLoc Loc) {
2555 
2556   assert(isRegularReg(RegKind));
2557 
2558   unsigned AlignSize = 1;
2559   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2560     // SGPR and TTMP registers must be aligned.
2561     // Max required alignment is 4 dwords.
2562     AlignSize = std::min(RegWidth / 32, 4u);
2563   }
2564 
2565   if (RegNum % AlignSize != 0) {
2566     Error(Loc, "invalid register alignment");
2567     return AMDGPU::NoRegister;
2568   }
2569 
2570   unsigned RegIdx = RegNum / AlignSize;
2571   int RCID = getRegClass(RegKind, RegWidth);
2572   if (RCID == -1) {
2573     Error(Loc, "invalid or unsupported register size");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2578   const MCRegisterClass RC = TRI->getRegClass(RCID);
2579   if (RegIdx >= RC.getNumRegs()) {
2580     Error(Loc, "register index is out of range");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   return RC.getRegister(RegIdx);
2585 }
2586 
2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2588   int64_t RegLo, RegHi;
2589   if (!skipToken(AsmToken::LBrac, "missing register index"))
2590     return false;
2591 
2592   SMLoc FirstIdxLoc = getLoc();
2593   SMLoc SecondIdxLoc;
2594 
2595   if (!parseExpr(RegLo))
2596     return false;
2597 
2598   if (trySkipToken(AsmToken::Colon)) {
2599     SecondIdxLoc = getLoc();
2600     if (!parseExpr(RegHi))
2601       return false;
2602   } else {
2603     RegHi = RegLo;
2604   }
2605 
2606   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2607     return false;
2608 
2609   if (!isUInt<32>(RegLo)) {
2610     Error(FirstIdxLoc, "invalid register index");
2611     return false;
2612   }
2613 
2614   if (!isUInt<32>(RegHi)) {
2615     Error(SecondIdxLoc, "invalid register index");
2616     return false;
2617   }
2618 
2619   if (RegLo > RegHi) {
2620     Error(FirstIdxLoc, "first register index should not exceed second index");
2621     return false;
2622   }
2623 
2624   Num = static_cast<unsigned>(RegLo);
2625   RegWidth = 32 * ((RegHi - RegLo) + 1);
2626   return true;
2627 }
2628 
2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2630                                           unsigned &RegNum, unsigned &RegWidth,
2631                                           SmallVectorImpl<AsmToken> &Tokens) {
2632   assert(isToken(AsmToken::Identifier));
2633   unsigned Reg = getSpecialRegForName(getTokenStr());
2634   if (Reg) {
2635     RegNum = 0;
2636     RegWidth = 32;
2637     RegKind = IS_SPECIAL;
2638     Tokens.push_back(getToken());
2639     lex(); // skip register name
2640   }
2641   return Reg;
2642 }
2643 
2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2645                                           unsigned &RegNum, unsigned &RegWidth,
2646                                           SmallVectorImpl<AsmToken> &Tokens) {
2647   assert(isToken(AsmToken::Identifier));
2648   StringRef RegName = getTokenStr();
2649   auto Loc = getLoc();
2650 
2651   const RegInfo *RI = getRegularRegInfo(RegName);
2652   if (!RI) {
2653     Error(Loc, "invalid register name");
2654     return AMDGPU::NoRegister;
2655   }
2656 
2657   Tokens.push_back(getToken());
2658   lex(); // skip register name
2659 
2660   RegKind = RI->Kind;
2661   StringRef RegSuffix = RegName.substr(RI->Name.size());
2662   if (!RegSuffix.empty()) {
2663     // Single 32-bit register: vXX.
2664     if (!getRegNum(RegSuffix, RegNum)) {
2665       Error(Loc, "invalid register index");
2666       return AMDGPU::NoRegister;
2667     }
2668     RegWidth = 32;
2669   } else {
2670     // Range of registers: v[XX:YY]. ":YY" is optional.
2671     if (!ParseRegRange(RegNum, RegWidth))
2672       return AMDGPU::NoRegister;
2673   }
2674 
2675   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2676 }
2677 
2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2679                                        unsigned &RegWidth,
2680                                        SmallVectorImpl<AsmToken> &Tokens) {
2681   unsigned Reg = AMDGPU::NoRegister;
2682   auto ListLoc = getLoc();
2683 
2684   if (!skipToken(AsmToken::LBrac,
2685                  "expected a register or a list of registers")) {
2686     return AMDGPU::NoRegister;
2687   }
2688 
2689   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2690 
2691   auto Loc = getLoc();
2692   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2693     return AMDGPU::NoRegister;
2694   if (RegWidth != 32) {
2695     Error(Loc, "expected a single 32-bit register");
2696     return AMDGPU::NoRegister;
2697   }
2698 
2699   for (; trySkipToken(AsmToken::Comma); ) {
2700     RegisterKind NextRegKind;
2701     unsigned NextReg, NextRegNum, NextRegWidth;
2702     Loc = getLoc();
2703 
2704     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2705                              NextRegNum, NextRegWidth,
2706                              Tokens)) {
2707       return AMDGPU::NoRegister;
2708     }
2709     if (NextRegWidth != 32) {
2710       Error(Loc, "expected a single 32-bit register");
2711       return AMDGPU::NoRegister;
2712     }
2713     if (NextRegKind != RegKind) {
2714       Error(Loc, "registers in a list must be of the same kind");
2715       return AMDGPU::NoRegister;
2716     }
2717     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2718       return AMDGPU::NoRegister;
2719   }
2720 
2721   if (!skipToken(AsmToken::RBrac,
2722                  "expected a comma or a closing square bracket")) {
2723     return AMDGPU::NoRegister;
2724   }
2725 
2726   if (isRegularReg(RegKind))
2727     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2728 
2729   return Reg;
2730 }
2731 
2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2733                                           unsigned &RegNum, unsigned &RegWidth,
2734                                           SmallVectorImpl<AsmToken> &Tokens) {
2735   auto Loc = getLoc();
2736   Reg = AMDGPU::NoRegister;
2737 
2738   if (isToken(AsmToken::Identifier)) {
2739     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2740     if (Reg == AMDGPU::NoRegister)
2741       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2742   } else {
2743     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2744   }
2745 
2746   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2747   if (Reg == AMDGPU::NoRegister) {
2748     assert(Parser.hasPendingError());
2749     return false;
2750   }
2751 
2752   if (!subtargetHasRegister(*TRI, Reg)) {
2753     if (Reg == AMDGPU::SGPR_NULL) {
2754       Error(Loc, "'null' operand is not supported on this GPU");
2755     } else {
2756       Error(Loc, "register not available on this GPU");
2757     }
2758     return false;
2759   }
2760 
2761   return true;
2762 }
2763 
2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2765                                           unsigned &RegNum, unsigned &RegWidth,
2766                                           bool RestoreOnFailure /*=false*/) {
2767   Reg = AMDGPU::NoRegister;
2768 
2769   SmallVector<AsmToken, 1> Tokens;
2770   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2771     if (RestoreOnFailure) {
2772       while (!Tokens.empty()) {
2773         getLexer().UnLex(Tokens.pop_back_val());
2774       }
2775     }
2776     return true;
2777   }
2778   return false;
2779 }
2780 
2781 Optional<StringRef>
2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2783   switch (RegKind) {
2784   case IS_VGPR:
2785     return StringRef(".amdgcn.next_free_vgpr");
2786   case IS_SGPR:
2787     return StringRef(".amdgcn.next_free_sgpr");
2788   default:
2789     return None;
2790   }
2791 }
2792 
2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2794   auto SymbolName = getGprCountSymbolName(RegKind);
2795   assert(SymbolName && "initializing invalid register kind");
2796   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2797   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2798 }
2799 
2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2801                                             unsigned DwordRegIndex,
2802                                             unsigned RegWidth) {
2803   // Symbols are only defined for GCN targets
2804   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2805     return true;
2806 
2807   auto SymbolName = getGprCountSymbolName(RegKind);
2808   if (!SymbolName)
2809     return true;
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811 
2812   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2813   int64_t OldCount;
2814 
2815   if (!Sym->isVariable())
2816     return !Error(getLoc(),
2817                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2818   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2819     return !Error(
2820         getLoc(),
2821         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2822 
2823   if (OldCount <= NewMax)
2824     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2825 
2826   return true;
2827 }
2828 
2829 std::unique_ptr<AMDGPUOperand>
2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2831   const auto &Tok = getToken();
2832   SMLoc StartLoc = Tok.getLoc();
2833   SMLoc EndLoc = Tok.getEndLoc();
2834   RegisterKind RegKind;
2835   unsigned Reg, RegNum, RegWidth;
2836 
2837   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2838     return nullptr;
2839   }
2840   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2841     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2842       return nullptr;
2843   } else
2844     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2845   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2846 }
2847 
2848 OperandMatchResultTy
2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2850   // TODO: add syntactic sugar for 1/(2*PI)
2851 
2852   assert(!isRegister());
2853   assert(!isModifier());
2854 
2855   const auto& Tok = getToken();
2856   const auto& NextTok = peekToken();
2857   bool IsReal = Tok.is(AsmToken::Real);
2858   SMLoc S = getLoc();
2859   bool Negate = false;
2860 
2861   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2862     lex();
2863     IsReal = true;
2864     Negate = true;
2865   }
2866 
2867   if (IsReal) {
2868     // Floating-point expressions are not supported.
2869     // Can only allow floating-point literals with an
2870     // optional sign.
2871 
2872     StringRef Num = getTokenStr();
2873     lex();
2874 
2875     APFloat RealVal(APFloat::IEEEdouble());
2876     auto roundMode = APFloat::rmNearestTiesToEven;
2877     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2878       return MatchOperand_ParseFail;
2879     }
2880     if (Negate)
2881       RealVal.changeSign();
2882 
2883     Operands.push_back(
2884       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2885                                AMDGPUOperand::ImmTyNone, true));
2886 
2887     return MatchOperand_Success;
2888 
2889   } else {
2890     int64_t IntVal;
2891     const MCExpr *Expr;
2892     SMLoc S = getLoc();
2893 
2894     if (HasSP3AbsModifier) {
2895       // This is a workaround for handling expressions
2896       // as arguments of SP3 'abs' modifier, for example:
2897       //     |1.0|
2898       //     |-1|
2899       //     |1+x|
2900       // This syntax is not compatible with syntax of standard
2901       // MC expressions (due to the trailing '|').
2902       SMLoc EndLoc;
2903       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2904         return MatchOperand_ParseFail;
2905     } else {
2906       if (Parser.parseExpression(Expr))
2907         return MatchOperand_ParseFail;
2908     }
2909 
2910     if (Expr->evaluateAsAbsolute(IntVal)) {
2911       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2912     } else {
2913       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2914     }
2915 
2916     return MatchOperand_Success;
2917   }
2918 
2919   return MatchOperand_NoMatch;
2920 }
2921 
2922 OperandMatchResultTy
2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2924   if (!isRegister())
2925     return MatchOperand_NoMatch;
2926 
2927   if (auto R = parseRegister()) {
2928     assert(R->isReg());
2929     Operands.push_back(std::move(R));
2930     return MatchOperand_Success;
2931   }
2932   return MatchOperand_ParseFail;
2933 }
2934 
2935 OperandMatchResultTy
2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2937   auto res = parseReg(Operands);
2938   if (res != MatchOperand_NoMatch) {
2939     return res;
2940   } else if (isModifier()) {
2941     return MatchOperand_NoMatch;
2942   } else {
2943     return parseImm(Operands, HasSP3AbsMod);
2944   }
2945 }
2946 
2947 bool
2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2949   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2950     const auto &str = Token.getString();
2951     return str == "abs" || str == "neg" || str == "sext";
2952   }
2953   return false;
2954 }
2955 
2956 bool
2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2958   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2964 }
2965 
2966 bool
2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2968   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2969 }
2970 
2971 // Check if this is an operand modifier or an opcode modifier
2972 // which may look like an expression but it is not. We should
2973 // avoid parsing these modifiers as expressions. Currently
2974 // recognized sequences are:
2975 //   |...|
2976 //   abs(...)
2977 //   neg(...)
2978 //   sext(...)
2979 //   -reg
2980 //   -|...|
2981 //   -abs(...)
2982 //   name:...
2983 // Note that simple opcode modifiers like 'gds' may be parsed as
2984 // expressions; this is a special case. See getExpressionAsToken.
2985 //
2986 bool
2987 AMDGPUAsmParser::isModifier() {
2988 
2989   AsmToken Tok = getToken();
2990   AsmToken NextToken[2];
2991   peekTokens(NextToken);
2992 
2993   return isOperandModifier(Tok, NextToken[0]) ||
2994          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2995          isOpcodeModifierWithVal(Tok, NextToken[0]);
2996 }
2997 
2998 // Check if the current token is an SP3 'neg' modifier.
2999 // Currently this modifier is allowed in the following context:
3000 //
3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3002 // 2. Before an 'abs' modifier: -abs(...)
3003 // 3. Before an SP3 'abs' modifier: -|...|
3004 //
3005 // In all other cases "-" is handled as a part
3006 // of an expression that follows the sign.
3007 //
3008 // Note: When "-" is followed by an integer literal,
3009 // this is interpreted as integer negation rather
3010 // than a floating-point NEG modifier applied to N.
3011 // Beside being contr-intuitive, such use of floating-point
3012 // NEG modifier would have resulted in different meaning
3013 // of integer literals used with VOP1/2/C and VOP3,
3014 // for example:
3015 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3016 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3017 // Negative fp literals with preceding "-" are
3018 // handled likewise for uniformity
3019 //
3020 bool
3021 AMDGPUAsmParser::parseSP3NegModifier() {
3022 
3023   AsmToken NextToken[2];
3024   peekTokens(NextToken);
3025 
3026   if (isToken(AsmToken::Minus) &&
3027       (isRegister(NextToken[0], NextToken[1]) ||
3028        NextToken[0].is(AsmToken::Pipe) ||
3029        isId(NextToken[0], "abs"))) {
3030     lex();
3031     return true;
3032   }
3033 
3034   return false;
3035 }
3036 
3037 OperandMatchResultTy
3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3039                                               bool AllowImm) {
3040   bool Neg, SP3Neg;
3041   bool Abs, SP3Abs;
3042   SMLoc Loc;
3043 
3044   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3045   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3046     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3047     return MatchOperand_ParseFail;
3048   }
3049 
3050   SP3Neg = parseSP3NegModifier();
3051 
3052   Loc = getLoc();
3053   Neg = trySkipId("neg");
3054   if (Neg && SP3Neg) {
3055     Error(Loc, "expected register or immediate");
3056     return MatchOperand_ParseFail;
3057   }
3058   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3059     return MatchOperand_ParseFail;
3060 
3061   Abs = trySkipId("abs");
3062   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3063     return MatchOperand_ParseFail;
3064 
3065   Loc = getLoc();
3066   SP3Abs = trySkipToken(AsmToken::Pipe);
3067   if (Abs && SP3Abs) {
3068     Error(Loc, "expected register or immediate");
3069     return MatchOperand_ParseFail;
3070   }
3071 
3072   OperandMatchResultTy Res;
3073   if (AllowImm) {
3074     Res = parseRegOrImm(Operands, SP3Abs);
3075   } else {
3076     Res = parseReg(Operands);
3077   }
3078   if (Res != MatchOperand_Success) {
3079     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3080   }
3081 
3082   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3083     return MatchOperand_ParseFail;
3084   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3085     return MatchOperand_ParseFail;
3086   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3087     return MatchOperand_ParseFail;
3088 
3089   AMDGPUOperand::Modifiers Mods;
3090   Mods.Abs = Abs || SP3Abs;
3091   Mods.Neg = Neg || SP3Neg;
3092 
3093   if (Mods.hasFPModifiers()) {
3094     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3095     if (Op.isExpr()) {
3096       Error(Op.getStartLoc(), "expected an absolute expression");
3097       return MatchOperand_ParseFail;
3098     }
3099     Op.setModifiers(Mods);
3100   }
3101   return MatchOperand_Success;
3102 }
3103 
3104 OperandMatchResultTy
3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3106                                                bool AllowImm) {
3107   bool Sext = trySkipId("sext");
3108   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3109     return MatchOperand_ParseFail;
3110 
3111   OperandMatchResultTy Res;
3112   if (AllowImm) {
3113     Res = parseRegOrImm(Operands);
3114   } else {
3115     Res = parseReg(Operands);
3116   }
3117   if (Res != MatchOperand_Success) {
3118     return Sext? MatchOperand_ParseFail : Res;
3119   }
3120 
3121   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3122     return MatchOperand_ParseFail;
3123 
3124   AMDGPUOperand::Modifiers Mods;
3125   Mods.Sext = Sext;
3126 
3127   if (Mods.hasIntModifiers()) {
3128     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3129     if (Op.isExpr()) {
3130       Error(Op.getStartLoc(), "expected an absolute expression");
3131       return MatchOperand_ParseFail;
3132     }
3133     Op.setModifiers(Mods);
3134   }
3135 
3136   return MatchOperand_Success;
3137 }
3138 
3139 OperandMatchResultTy
3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3141   return parseRegOrImmWithFPInputMods(Operands, false);
3142 }
3143 
3144 OperandMatchResultTy
3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3146   return parseRegOrImmWithIntInputMods(Operands, false);
3147 }
3148 
3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3150   auto Loc = getLoc();
3151   if (trySkipId("off")) {
3152     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3153                                                 AMDGPUOperand::ImmTyOff, false));
3154     return MatchOperand_Success;
3155   }
3156 
3157   if (!isRegister())
3158     return MatchOperand_NoMatch;
3159 
3160   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3161   if (Reg) {
3162     Operands.push_back(std::move(Reg));
3163     return MatchOperand_Success;
3164   }
3165 
3166   return MatchOperand_ParseFail;
3167 
3168 }
3169 
3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3171   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3172 
3173   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3174       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3175       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3176       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3177     return Match_InvalidOperand;
3178 
3179   if ((TSFlags & SIInstrFlags::VOP3) &&
3180       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3181       getForcedEncodingSize() != 64)
3182     return Match_PreferE32;
3183 
3184   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3185       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3186     // v_mac_f32/16 allow only dst_sel == DWORD;
3187     auto OpNum =
3188         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3189     const auto &Op = Inst.getOperand(OpNum);
3190     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3191       return Match_InvalidOperand;
3192     }
3193   }
3194 
3195   return Match_Success;
3196 }
3197 
3198 static ArrayRef<unsigned> getAllVariants() {
3199   static const unsigned Variants[] = {
3200     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3201     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3202   };
3203 
3204   return makeArrayRef(Variants);
3205 }
3206 
3207 // What asm variants we should check
3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3209   if (getForcedEncodingSize() == 32) {
3210     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3211     return makeArrayRef(Variants);
3212   }
3213 
3214   if (isForcedVOP3()) {
3215     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3216     return makeArrayRef(Variants);
3217   }
3218 
3219   if (isForcedSDWA()) {
3220     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3221                                         AMDGPUAsmVariants::SDWA9};
3222     return makeArrayRef(Variants);
3223   }
3224 
3225   if (isForcedDPP()) {
3226     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3227     return makeArrayRef(Variants);
3228   }
3229 
3230   return getAllVariants();
3231 }
3232 
3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3234   if (getForcedEncodingSize() == 32)
3235     return "e32";
3236 
3237   if (isForcedVOP3())
3238     return "e64";
3239 
3240   if (isForcedSDWA())
3241     return "sdwa";
3242 
3243   if (isForcedDPP())
3244     return "dpp";
3245 
3246   return "";
3247 }
3248 
3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3250   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3251   const unsigned Num = Desc.getNumImplicitUses();
3252   for (unsigned i = 0; i < Num; ++i) {
3253     unsigned Reg = Desc.ImplicitUses[i];
3254     switch (Reg) {
3255     case AMDGPU::FLAT_SCR:
3256     case AMDGPU::VCC:
3257     case AMDGPU::VCC_LO:
3258     case AMDGPU::VCC_HI:
3259     case AMDGPU::M0:
3260       return Reg;
3261     default:
3262       break;
3263     }
3264   }
3265   return AMDGPU::NoRegister;
3266 }
3267 
3268 // NB: This code is correct only when used to check constant
3269 // bus limitations because GFX7 support no f16 inline constants.
3270 // Note that there are no cases when a GFX7 opcode violates
3271 // constant bus limitations due to the use of an f16 constant.
3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3273                                        unsigned OpIdx) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275 
3276   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3277     return false;
3278   }
3279 
3280   const MCOperand &MO = Inst.getOperand(OpIdx);
3281 
3282   int64_t Val = MO.getImm();
3283   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3284 
3285   switch (OpSize) { // expected operand size
3286   case 8:
3287     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3288   case 4:
3289     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3290   case 2: {
3291     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3292     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3293         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3294         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3295       return AMDGPU::isInlinableIntLiteral(Val);
3296 
3297     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3298         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3299         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3300       return AMDGPU::isInlinableIntLiteralV216(Val);
3301 
3302     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3304         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3305       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3306 
3307     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3308   }
3309   default:
3310     llvm_unreachable("invalid operand size");
3311   }
3312 }
3313 
3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3315   if (!isGFX10Plus())
3316     return 1;
3317 
3318   switch (Opcode) {
3319   // 64-bit shift instructions can use only one scalar value input
3320   case AMDGPU::V_LSHLREV_B64_e64:
3321   case AMDGPU::V_LSHLREV_B64_gfx10:
3322   case AMDGPU::V_LSHRREV_B64_e64:
3323   case AMDGPU::V_LSHRREV_B64_gfx10:
3324   case AMDGPU::V_ASHRREV_I64_e64:
3325   case AMDGPU::V_ASHRREV_I64_gfx10:
3326   case AMDGPU::V_LSHL_B64_e64:
3327   case AMDGPU::V_LSHR_B64_e64:
3328   case AMDGPU::V_ASHR_I64_e64:
3329     return 1;
3330   default:
3331     return 2;
3332   }
3333 }
3334 
3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3336   const MCOperand &MO = Inst.getOperand(OpIdx);
3337   if (MO.isImm()) {
3338     return !isInlineConstant(Inst, OpIdx);
3339   } else if (MO.isReg()) {
3340     auto Reg = MO.getReg();
3341     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3342     auto PReg = mc2PseudoReg(Reg);
3343     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3344   } else {
3345     return true;
3346   }
3347 }
3348 
3349 bool
3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3351                                                 const OperandVector &Operands) {
3352   const unsigned Opcode = Inst.getOpcode();
3353   const MCInstrDesc &Desc = MII.get(Opcode);
3354   unsigned LastSGPR = AMDGPU::NoRegister;
3355   unsigned ConstantBusUseCount = 0;
3356   unsigned NumLiterals = 0;
3357   unsigned LiteralSize;
3358 
3359   if (Desc.TSFlags &
3360       (SIInstrFlags::VOPC |
3361        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3362        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3363        SIInstrFlags::SDWA)) {
3364     // Check special imm operands (used by madmk, etc)
3365     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3366       ++NumLiterals;
3367       LiteralSize = 4;
3368     }
3369 
3370     SmallDenseSet<unsigned> SGPRsUsed;
3371     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3372     if (SGPRUsed != AMDGPU::NoRegister) {
3373       SGPRsUsed.insert(SGPRUsed);
3374       ++ConstantBusUseCount;
3375     }
3376 
3377     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3378     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3379     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3380 
3381     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3382 
3383     for (int OpIdx : OpIndices) {
3384       if (OpIdx == -1) break;
3385 
3386       const MCOperand &MO = Inst.getOperand(OpIdx);
3387       if (usesConstantBus(Inst, OpIdx)) {
3388         if (MO.isReg()) {
3389           LastSGPR = mc2PseudoReg(MO.getReg());
3390           // Pairs of registers with a partial intersections like these
3391           //   s0, s[0:1]
3392           //   flat_scratch_lo, flat_scratch
3393           //   flat_scratch_lo, flat_scratch_hi
3394           // are theoretically valid but they are disabled anyway.
3395           // Note that this code mimics SIInstrInfo::verifyInstruction
3396           if (!SGPRsUsed.count(LastSGPR)) {
3397             SGPRsUsed.insert(LastSGPR);
3398             ++ConstantBusUseCount;
3399           }
3400         } else { // Expression or a literal
3401 
3402           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3403             continue; // special operand like VINTERP attr_chan
3404 
3405           // An instruction may use only one literal.
3406           // This has been validated on the previous step.
3407           // See validateVOPLiteral.
3408           // This literal may be used as more than one operand.
3409           // If all these operands are of the same size,
3410           // this literal counts as one scalar value.
3411           // Otherwise it counts as 2 scalar values.
3412           // See "GFX10 Shader Programming", section 3.6.2.3.
3413 
3414           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3415           if (Size < 4) Size = 4;
3416 
3417           if (NumLiterals == 0) {
3418             NumLiterals = 1;
3419             LiteralSize = Size;
3420           } else if (LiteralSize != Size) {
3421             NumLiterals = 2;
3422           }
3423         }
3424       }
3425     }
3426   }
3427   ConstantBusUseCount += NumLiterals;
3428 
3429   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3430     return true;
3431 
3432   SMLoc LitLoc = getLitLoc(Operands);
3433   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3434   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3435   Error(Loc, "invalid operand (violates constant bus restrictions)");
3436   return false;
3437 }
3438 
3439 bool
3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3441                                                  const OperandVector &Operands) {
3442   const unsigned Opcode = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opcode);
3444 
3445   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3446   if (DstIdx == -1 ||
3447       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3448     return true;
3449   }
3450 
3451   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3452 
3453   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3454   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3455   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3456 
3457   assert(DstIdx != -1);
3458   const MCOperand &Dst = Inst.getOperand(DstIdx);
3459   assert(Dst.isReg());
3460 
3461   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3462 
3463   for (int SrcIdx : SrcIndices) {
3464     if (SrcIdx == -1) break;
3465     const MCOperand &Src = Inst.getOperand(SrcIdx);
3466     if (Src.isReg()) {
3467       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3468         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3469         Error(getRegLoc(SrcReg, Operands),
3470           "destination must be different than all sources");
3471         return false;
3472       }
3473     }
3474   }
3475 
3476   return true;
3477 }
3478 
3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3480 
3481   const unsigned Opc = Inst.getOpcode();
3482   const MCInstrDesc &Desc = MII.get(Opc);
3483 
3484   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3485     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3486     assert(ClampIdx != -1);
3487     return Inst.getOperand(ClampIdx).getImm() == 0;
3488   }
3489 
3490   return true;
3491 }
3492 
3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3499     return None;
3500 
3501   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3502   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3503   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3504 
3505   assert(VDataIdx != -1);
3506 
3507   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3508     return None;
3509 
3510   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3511   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3512   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3513   if (DMask == 0)
3514     DMask = 1;
3515 
3516   bool isPackedD16 = false;
3517   unsigned DataSize =
3518     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3519   if (hasPackedD16()) {
3520     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3521     isPackedD16 = D16Idx >= 0;
3522     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3523       DataSize = (DataSize + 1) / 2;
3524   }
3525 
3526   if ((VDataSize / 4) == DataSize + TFESize)
3527     return None;
3528 
3529   return StringRef(isPackedD16
3530                        ? "image data size does not match dmask, d16 and tfe"
3531                        : "image data size does not match dmask and tfe");
3532 }
3533 
3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3535   const unsigned Opc = Inst.getOpcode();
3536   const MCInstrDesc &Desc = MII.get(Opc);
3537 
3538   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3539     return true;
3540 
3541   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3542 
3543   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3544       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3545   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3546   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3547   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3548   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3549 
3550   assert(VAddr0Idx != -1);
3551   assert(SrsrcIdx != -1);
3552   assert(SrsrcIdx > VAddr0Idx);
3553 
3554   if (DimIdx == -1)
3555     return true; // intersect_ray
3556 
3557   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3558   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3559   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3560   unsigned ActualAddrSize =
3561       IsNSA ? SrsrcIdx - VAddr0Idx
3562             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3563   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3564 
3565   unsigned ExpectedAddrSize =
3566       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3567 
3568   if (!IsNSA) {
3569     if (ExpectedAddrSize > 8)
3570       ExpectedAddrSize = 16;
3571 
3572     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3573     // This provides backward compatibility for assembly created
3574     // before 160b/192b/224b types were directly supported.
3575     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3576       return true;
3577   }
3578 
3579   return ActualAddrSize == ExpectedAddrSize;
3580 }
3581 
3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3583 
3584   const unsigned Opc = Inst.getOpcode();
3585   const MCInstrDesc &Desc = MII.get(Opc);
3586 
3587   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3588     return true;
3589   if (!Desc.mayLoad() || !Desc.mayStore())
3590     return true; // Not atomic
3591 
3592   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3593   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3594 
3595   // This is an incomplete check because image_atomic_cmpswap
3596   // may only use 0x3 and 0xf while other atomic operations
3597   // may use 0x1 and 0x3. However these limitations are
3598   // verified when we check that dmask matches dst size.
3599   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3603 
3604   const unsigned Opc = Inst.getOpcode();
3605   const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3608     return true;
3609 
3610   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3611   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3612 
3613   // GATHER4 instructions use dmask in a different fashion compared to
3614   // other MIMG instructions. The only useful DMASK values are
3615   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3616   // (red,red,red,red) etc.) The ISA document doesn't mention
3617   // this.
3618   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626     return true;
3627 
3628   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3629   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3630       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3631 
3632   if (!BaseOpcode->MSAA)
3633     return true;
3634 
3635   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3636   assert(DimIdx != -1);
3637 
3638   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3639   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3640 
3641   return DimInfo->MSAA;
3642 }
3643 
3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3645 {
3646   switch (Opcode) {
3647   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3648   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3649   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3650     return true;
3651   default:
3652     return false;
3653   }
3654 }
3655 
3656 // movrels* opcodes should only allow VGPRS as src0.
3657 // This is specified in .td description for vop1/vop3,
3658 // but sdwa is handled differently. See isSDWAOperand.
3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3660                                       const OperandVector &Operands) {
3661 
3662   const unsigned Opc = Inst.getOpcode();
3663   const MCInstrDesc &Desc = MII.get(Opc);
3664 
3665   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3666     return true;
3667 
3668   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3669   assert(Src0Idx != -1);
3670 
3671   SMLoc ErrLoc;
3672   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3673   if (Src0.isReg()) {
3674     auto Reg = mc2PseudoReg(Src0.getReg());
3675     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3676     if (!isSGPR(Reg, TRI))
3677       return true;
3678     ErrLoc = getRegLoc(Reg, Operands);
3679   } else {
3680     ErrLoc = getConstLoc(Operands);
3681   }
3682 
3683   Error(ErrLoc, "source operand must be a VGPR");
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3688                                           const OperandVector &Operands) {
3689 
3690   const unsigned Opc = Inst.getOpcode();
3691 
3692   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3696   assert(Src0Idx != -1);
3697 
3698   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3699   if (!Src0.isReg())
3700     return true;
3701 
3702   auto Reg = mc2PseudoReg(Src0.getReg());
3703   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3704   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3705     Error(getRegLoc(Reg, Operands),
3706           "source operand must be either a VGPR or an inline constant");
3707     return false;
3708   }
3709 
3710   return true;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3714                                    const OperandVector &Operands) {
3715   const unsigned Opc = Inst.getOpcode();
3716   const MCInstrDesc &Desc = MII.get(Opc);
3717 
3718   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3719     return true;
3720 
3721   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3722   if (Src2Idx == -1)
3723     return true;
3724 
3725   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3726   if (!Src2.isReg())
3727     return true;
3728 
3729   MCRegister Src2Reg = Src2.getReg();
3730   MCRegister DstReg = Inst.getOperand(0).getReg();
3731   if (Src2Reg == DstReg)
3732     return true;
3733 
3734   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3735   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3736     return true;
3737 
3738   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3739     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3740           "source 2 operand must not partially overlap with dst");
3741     return false;
3742   }
3743 
3744   return true;
3745 }
3746 
3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3748   switch (Inst.getOpcode()) {
3749   default:
3750     return true;
3751   case V_DIV_SCALE_F32_gfx6_gfx7:
3752   case V_DIV_SCALE_F32_vi:
3753   case V_DIV_SCALE_F32_gfx10:
3754   case V_DIV_SCALE_F64_gfx6_gfx7:
3755   case V_DIV_SCALE_F64_vi:
3756   case V_DIV_SCALE_F64_gfx10:
3757     break;
3758   }
3759 
3760   // TODO: Check that src0 = src1 or src2.
3761 
3762   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3763                     AMDGPU::OpName::src2_modifiers,
3764                     AMDGPU::OpName::src2_modifiers}) {
3765     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3766             .getImm() &
3767         SISrcMods::ABS) {
3768       return false;
3769     }
3770   }
3771 
3772   return true;
3773 }
3774 
3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3776 
3777   const unsigned Opc = Inst.getOpcode();
3778   const MCInstrDesc &Desc = MII.get(Opc);
3779 
3780   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3781     return true;
3782 
3783   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3784   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3785     if (isCI() || isSI())
3786       return false;
3787   }
3788 
3789   return true;
3790 }
3791 
3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3793   const unsigned Opc = Inst.getOpcode();
3794   const MCInstrDesc &Desc = MII.get(Opc);
3795 
3796   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3797     return true;
3798 
3799   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3800   if (DimIdx < 0)
3801     return true;
3802 
3803   long Imm = Inst.getOperand(DimIdx).getImm();
3804   if (Imm < 0 || Imm >= 8)
3805     return false;
3806 
3807   return true;
3808 }
3809 
3810 static bool IsRevOpcode(const unsigned Opcode)
3811 {
3812   switch (Opcode) {
3813   case AMDGPU::V_SUBREV_F32_e32:
3814   case AMDGPU::V_SUBREV_F32_e64:
3815   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3816   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3817   case AMDGPU::V_SUBREV_F32_e32_vi:
3818   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3819   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3820   case AMDGPU::V_SUBREV_F32_e64_vi:
3821 
3822   case AMDGPU::V_SUBREV_CO_U32_e32:
3823   case AMDGPU::V_SUBREV_CO_U32_e64:
3824   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3825   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3826 
3827   case AMDGPU::V_SUBBREV_U32_e32:
3828   case AMDGPU::V_SUBBREV_U32_e64:
3829   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3830   case AMDGPU::V_SUBBREV_U32_e32_vi:
3831   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3832   case AMDGPU::V_SUBBREV_U32_e64_vi:
3833 
3834   case AMDGPU::V_SUBREV_U32_e32:
3835   case AMDGPU::V_SUBREV_U32_e64:
3836   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3837   case AMDGPU::V_SUBREV_U32_e32_vi:
3838   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3839   case AMDGPU::V_SUBREV_U32_e64_vi:
3840 
3841   case AMDGPU::V_SUBREV_F16_e32:
3842   case AMDGPU::V_SUBREV_F16_e64:
3843   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3844   case AMDGPU::V_SUBREV_F16_e32_vi:
3845   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3846   case AMDGPU::V_SUBREV_F16_e64_vi:
3847 
3848   case AMDGPU::V_SUBREV_U16_e32:
3849   case AMDGPU::V_SUBREV_U16_e64:
3850   case AMDGPU::V_SUBREV_U16_e32_vi:
3851   case AMDGPU::V_SUBREV_U16_e64_vi:
3852 
3853   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3854   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3855   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3856 
3857   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3858   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3859 
3860   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3861   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3862 
3863   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3864   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3865 
3866   case AMDGPU::V_LSHRREV_B32_e32:
3867   case AMDGPU::V_LSHRREV_B32_e64:
3868   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3869   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3870   case AMDGPU::V_LSHRREV_B32_e32_vi:
3871   case AMDGPU::V_LSHRREV_B32_e64_vi:
3872   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3873   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3874 
3875   case AMDGPU::V_ASHRREV_I32_e32:
3876   case AMDGPU::V_ASHRREV_I32_e64:
3877   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3878   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3879   case AMDGPU::V_ASHRREV_I32_e32_vi:
3880   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3881   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3882   case AMDGPU::V_ASHRREV_I32_e64_vi:
3883 
3884   case AMDGPU::V_LSHLREV_B32_e32:
3885   case AMDGPU::V_LSHLREV_B32_e64:
3886   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3887   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3888   case AMDGPU::V_LSHLREV_B32_e32_vi:
3889   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3890   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3891   case AMDGPU::V_LSHLREV_B32_e64_vi:
3892 
3893   case AMDGPU::V_LSHLREV_B16_e32:
3894   case AMDGPU::V_LSHLREV_B16_e64:
3895   case AMDGPU::V_LSHLREV_B16_e32_vi:
3896   case AMDGPU::V_LSHLREV_B16_e64_vi:
3897   case AMDGPU::V_LSHLREV_B16_gfx10:
3898 
3899   case AMDGPU::V_LSHRREV_B16_e32:
3900   case AMDGPU::V_LSHRREV_B16_e64:
3901   case AMDGPU::V_LSHRREV_B16_e32_vi:
3902   case AMDGPU::V_LSHRREV_B16_e64_vi:
3903   case AMDGPU::V_LSHRREV_B16_gfx10:
3904 
3905   case AMDGPU::V_ASHRREV_I16_e32:
3906   case AMDGPU::V_ASHRREV_I16_e64:
3907   case AMDGPU::V_ASHRREV_I16_e32_vi:
3908   case AMDGPU::V_ASHRREV_I16_e64_vi:
3909   case AMDGPU::V_ASHRREV_I16_gfx10:
3910 
3911   case AMDGPU::V_LSHLREV_B64_e64:
3912   case AMDGPU::V_LSHLREV_B64_gfx10:
3913   case AMDGPU::V_LSHLREV_B64_vi:
3914 
3915   case AMDGPU::V_LSHRREV_B64_e64:
3916   case AMDGPU::V_LSHRREV_B64_gfx10:
3917   case AMDGPU::V_LSHRREV_B64_vi:
3918 
3919   case AMDGPU::V_ASHRREV_I64_e64:
3920   case AMDGPU::V_ASHRREV_I64_gfx10:
3921   case AMDGPU::V_ASHRREV_I64_vi:
3922 
3923   case AMDGPU::V_PK_LSHLREV_B16:
3924   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3925   case AMDGPU::V_PK_LSHLREV_B16_vi:
3926 
3927   case AMDGPU::V_PK_LSHRREV_B16:
3928   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3929   case AMDGPU::V_PK_LSHRREV_B16_vi:
3930   case AMDGPU::V_PK_ASHRREV_I16:
3931   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3932   case AMDGPU::V_PK_ASHRREV_I16_vi:
3933     return true;
3934   default:
3935     return false;
3936   }
3937 }
3938 
3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3940 
3941   using namespace SIInstrFlags;
3942   const unsigned Opcode = Inst.getOpcode();
3943   const MCInstrDesc &Desc = MII.get(Opcode);
3944 
3945   // lds_direct register is defined so that it can be used
3946   // with 9-bit operands only. Ignore encodings which do not accept these.
3947   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3948   if ((Desc.TSFlags & Enc) == 0)
3949     return None;
3950 
3951   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3952     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3953     if (SrcIdx == -1)
3954       break;
3955     const auto &Src = Inst.getOperand(SrcIdx);
3956     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3957 
3958       if (isGFX90A())
3959         return StringRef("lds_direct is not supported on this GPU");
3960 
3961       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3962         return StringRef("lds_direct cannot be used with this instruction");
3963 
3964       if (SrcName != OpName::src0)
3965         return StringRef("lds_direct may be used as src0 only");
3966     }
3967   }
3968 
3969   return None;
3970 }
3971 
3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3973   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3975     if (Op.isFlatOffset())
3976       return Op.getStartLoc();
3977   }
3978   return getLoc();
3979 }
3980 
3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3982                                          const OperandVector &Operands) {
3983   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3984   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3985     return true;
3986 
3987   auto Opcode = Inst.getOpcode();
3988   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3989   assert(OpNum != -1);
3990 
3991   const auto &Op = Inst.getOperand(OpNum);
3992   if (!hasFlatOffsets() && Op.getImm() != 0) {
3993     Error(getFlatOffsetLoc(Operands),
3994           "flat offset modifier is not supported on this GPU");
3995     return false;
3996   }
3997 
3998   // For FLAT segment the offset must be positive;
3999   // MSB is ignored and forced to zero.
4000   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4001     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4002     if (!isIntN(OffsetSize, Op.getImm())) {
4003       Error(getFlatOffsetLoc(Operands),
4004             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4005       return false;
4006     }
4007   } else {
4008     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4009     if (!isUIntN(OffsetSize, Op.getImm())) {
4010       Error(getFlatOffsetLoc(Operands),
4011             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4012       return false;
4013     }
4014   }
4015 
4016   return true;
4017 }
4018 
4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4020   // Start with second operand because SMEM Offset cannot be dst or src0.
4021   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4022     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4023     if (Op.isSMEMOffset())
4024       return Op.getStartLoc();
4025   }
4026   return getLoc();
4027 }
4028 
4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4030                                          const OperandVector &Operands) {
4031   if (isCI() || isSI())
4032     return true;
4033 
4034   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4035   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4036     return true;
4037 
4038   auto Opcode = Inst.getOpcode();
4039   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4040   if (OpNum == -1)
4041     return true;
4042 
4043   const auto &Op = Inst.getOperand(OpNum);
4044   if (!Op.isImm())
4045     return true;
4046 
4047   uint64_t Offset = Op.getImm();
4048   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4049   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4050       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4051     return true;
4052 
4053   Error(getSMEMOffsetLoc(Operands),
4054         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4055                                "expected a 21-bit signed offset");
4056 
4057   return false;
4058 }
4059 
4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4061   unsigned Opcode = Inst.getOpcode();
4062   const MCInstrDesc &Desc = MII.get(Opcode);
4063   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4064     return true;
4065 
4066   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4067   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4068 
4069   const int OpIndices[] = { Src0Idx, Src1Idx };
4070 
4071   unsigned NumExprs = 0;
4072   unsigned NumLiterals = 0;
4073   uint32_t LiteralValue;
4074 
4075   for (int OpIdx : OpIndices) {
4076     if (OpIdx == -1) break;
4077 
4078     const MCOperand &MO = Inst.getOperand(OpIdx);
4079     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4080     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4081       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4082         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4083         if (NumLiterals == 0 || LiteralValue != Value) {
4084           LiteralValue = Value;
4085           ++NumLiterals;
4086         }
4087       } else if (MO.isExpr()) {
4088         ++NumExprs;
4089       }
4090     }
4091   }
4092 
4093   return NumLiterals + NumExprs <= 1;
4094 }
4095 
4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4097   const unsigned Opc = Inst.getOpcode();
4098   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4099       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4100     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4101     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4102 
4103     if (OpSel & ~3)
4104       return false;
4105   }
4106 
4107   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4108     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4109     if (OpSelIdx != -1) {
4110       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4111         return false;
4112     }
4113     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4114     if (OpSelHiIdx != -1) {
4115       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4116         return false;
4117     }
4118   }
4119 
4120   return true;
4121 }
4122 
4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4124                                   const OperandVector &Operands) {
4125   const unsigned Opc = Inst.getOpcode();
4126   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4127   if (DppCtrlIdx < 0)
4128     return true;
4129   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4130 
4131   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4132     // DPP64 is supported for row_newbcast only.
4133     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4134     if (Src0Idx >= 0 &&
4135         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4136       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4137       Error(S, "64 bit dpp only supports row_newbcast");
4138       return false;
4139     }
4140   }
4141 
4142   return true;
4143 }
4144 
4145 // Check if VCC register matches wavefront size
4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4147   auto FB = getFeatureBits();
4148   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4149     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4150 }
4151 
4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4154                                          const OperandVector &Operands) {
4155   unsigned Opcode = Inst.getOpcode();
4156   const MCInstrDesc &Desc = MII.get(Opcode);
4157   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4158   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4159       ImmIdx == -1)
4160     return true;
4161 
4162   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4163   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4164   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4165 
4166   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4167 
4168   unsigned NumExprs = 0;
4169   unsigned NumLiterals = 0;
4170   uint32_t LiteralValue;
4171 
4172   for (int OpIdx : OpIndices) {
4173     if (OpIdx == -1)
4174       continue;
4175 
4176     const MCOperand &MO = Inst.getOperand(OpIdx);
4177     if (!MO.isImm() && !MO.isExpr())
4178       continue;
4179     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4180       continue;
4181 
4182     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4183         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4184       Error(getConstLoc(Operands),
4185             "inline constants are not allowed for this operand");
4186       return false;
4187     }
4188 
4189     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4190       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4191       if (NumLiterals == 0 || LiteralValue != Value) {
4192         LiteralValue = Value;
4193         ++NumLiterals;
4194       }
4195     } else if (MO.isExpr()) {
4196       ++NumExprs;
4197     }
4198   }
4199   NumLiterals += NumExprs;
4200 
4201   if (!NumLiterals)
4202     return true;
4203 
4204   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4205     Error(getLitLoc(Operands), "literal operands are not supported");
4206     return false;
4207   }
4208 
4209   if (NumLiterals > 1) {
4210     Error(getLitLoc(Operands), "only one literal operand is allowed");
4211     return false;
4212   }
4213 
4214   return true;
4215 }
4216 
4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4219                          const MCRegisterInfo *MRI) {
4220   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4221   if (OpIdx < 0)
4222     return -1;
4223 
4224   const MCOperand &Op = Inst.getOperand(OpIdx);
4225   if (!Op.isReg())
4226     return -1;
4227 
4228   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4229   auto Reg = Sub ? Sub : Op.getReg();
4230   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4231   return AGPR32.contains(Reg) ? 1 : 0;
4232 }
4233 
4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4235   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4236   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4237                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4238                   SIInstrFlags::DS)) == 0)
4239     return true;
4240 
4241   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4242                                                       : AMDGPU::OpName::vdata;
4243 
4244   const MCRegisterInfo *MRI = getMRI();
4245   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4246   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4247 
4248   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4249     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4250     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4251       return false;
4252   }
4253 
4254   auto FB = getFeatureBits();
4255   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4256     if (DataAreg < 0 || DstAreg < 0)
4257       return true;
4258     return DstAreg == DataAreg;
4259   }
4260 
4261   return DstAreg < 1 && DataAreg < 1;
4262 }
4263 
4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4265   auto FB = getFeatureBits();
4266   if (!FB[AMDGPU::FeatureGFX90AInsts])
4267     return true;
4268 
4269   const MCRegisterInfo *MRI = getMRI();
4270   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4271   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4272   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4273     const MCOperand &Op = Inst.getOperand(I);
4274     if (!Op.isReg())
4275       continue;
4276 
4277     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4278     if (!Sub)
4279       continue;
4280 
4281     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4282       return false;
4283     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4284       return false;
4285   }
4286 
4287   return true;
4288 }
4289 
4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4291   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4292     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4293     if (Op.isBLGP())
4294       return Op.getStartLoc();
4295   }
4296   return SMLoc();
4297 }
4298 
4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4300                                    const OperandVector &Operands) {
4301   unsigned Opc = Inst.getOpcode();
4302   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4303   if (BlgpIdx == -1)
4304     return true;
4305   SMLoc BLGPLoc = getBLGPLoc(Operands);
4306   if (!BLGPLoc.isValid())
4307     return true;
4308   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4309   auto FB = getFeatureBits();
4310   bool UsesNeg = false;
4311   if (FB[AMDGPU::FeatureGFX940Insts]) {
4312     switch (Opc) {
4313     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4314     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4315     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4316     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4317       UsesNeg = true;
4318     }
4319   }
4320 
4321   if (IsNeg == UsesNeg)
4322     return true;
4323 
4324   Error(BLGPLoc,
4325         UsesNeg ? "invalid modifier: blgp is not supported"
4326                 : "invalid modifier: neg is not supported");
4327 
4328   return false;
4329 }
4330 
4331 // gfx90a has an undocumented limitation:
4332 // DS_GWS opcodes must use even aligned registers.
4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4334                                   const OperandVector &Operands) {
4335   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4336     return true;
4337 
4338   int Opc = Inst.getOpcode();
4339   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4340       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4341     return true;
4342 
4343   const MCRegisterInfo *MRI = getMRI();
4344   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4345   int Data0Pos =
4346       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4347   assert(Data0Pos != -1);
4348   auto Reg = Inst.getOperand(Data0Pos).getReg();
4349   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4350   if (RegIdx & 1) {
4351     SMLoc RegLoc = getRegLoc(Reg, Operands);
4352     Error(RegLoc, "vgpr must be even aligned");
4353     return false;
4354   }
4355 
4356   return true;
4357 }
4358 
4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4360                                             const OperandVector &Operands,
4361                                             const SMLoc &IDLoc) {
4362   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4363                                            AMDGPU::OpName::cpol);
4364   if (CPolPos == -1)
4365     return true;
4366 
4367   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4368 
4369   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4370   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4371       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4372     Error(IDLoc, "invalid cache policy for SMRD instruction");
4373     return false;
4374   }
4375 
4376   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4377     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4378     StringRef CStr(S.getPointer());
4379     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4380     Error(S, "scc is not supported on this GPU");
4381     return false;
4382   }
4383 
4384   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4385     return true;
4386 
4387   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4388     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4389       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4390                               : "instruction must use glc");
4391       return false;
4392     }
4393   } else {
4394     if (CPol & CPol::GLC) {
4395       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4396       StringRef CStr(S.getPointer());
4397       S = SMLoc::getFromPointer(
4398           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4399       Error(S, isGFX940() ? "instruction must not use sc0"
4400                           : "instruction must not use glc");
4401       return false;
4402     }
4403   }
4404 
4405   return true;
4406 }
4407 
4408 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4409                                           const SMLoc &IDLoc,
4410                                           const OperandVector &Operands) {
4411   if (auto ErrMsg = validateLdsDirect(Inst)) {
4412     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4413     return false;
4414   }
4415   if (!validateSOPLiteral(Inst)) {
4416     Error(getLitLoc(Operands),
4417       "only one literal operand is allowed");
4418     return false;
4419   }
4420   if (!validateVOPLiteral(Inst, Operands)) {
4421     return false;
4422   }
4423   if (!validateConstantBusLimitations(Inst, Operands)) {
4424     return false;
4425   }
4426   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4427     return false;
4428   }
4429   if (!validateIntClampSupported(Inst)) {
4430     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4431       "integer clamping is not supported on this GPU");
4432     return false;
4433   }
4434   if (!validateOpSel(Inst)) {
4435     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4436       "invalid op_sel operand");
4437     return false;
4438   }
4439   if (!validateDPP(Inst, Operands)) {
4440     return false;
4441   }
4442   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4443   if (!validateMIMGD16(Inst)) {
4444     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4445       "d16 modifier is not supported on this GPU");
4446     return false;
4447   }
4448   if (!validateMIMGDim(Inst)) {
4449     Error(IDLoc, "dim modifier is required on this GPU");
4450     return false;
4451   }
4452   if (!validateMIMGMSAA(Inst)) {
4453     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4454           "invalid dim; must be MSAA type");
4455     return false;
4456   }
4457   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4458     Error(IDLoc, *ErrMsg);
4459     return false;
4460   }
4461   if (!validateMIMGAddrSize(Inst)) {
4462     Error(IDLoc,
4463       "image address size does not match dim and a16");
4464     return false;
4465   }
4466   if (!validateMIMGAtomicDMask(Inst)) {
4467     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4468       "invalid atomic image dmask");
4469     return false;
4470   }
4471   if (!validateMIMGGatherDMask(Inst)) {
4472     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4473       "invalid image_gather dmask: only one bit must be set");
4474     return false;
4475   }
4476   if (!validateMovrels(Inst, Operands)) {
4477     return false;
4478   }
4479   if (!validateFlatOffset(Inst, Operands)) {
4480     return false;
4481   }
4482   if (!validateSMEMOffset(Inst, Operands)) {
4483     return false;
4484   }
4485   if (!validateMAIAccWrite(Inst, Operands)) {
4486     return false;
4487   }
4488   if (!validateMFMA(Inst, Operands)) {
4489     return false;
4490   }
4491   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4492     return false;
4493   }
4494 
4495   if (!validateAGPRLdSt(Inst)) {
4496     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4497     ? "invalid register class: data and dst should be all VGPR or AGPR"
4498     : "invalid register class: agpr loads and stores not supported on this GPU"
4499     );
4500     return false;
4501   }
4502   if (!validateVGPRAlign(Inst)) {
4503     Error(IDLoc,
4504       "invalid register class: vgpr tuples must be 64 bit aligned");
4505     return false;
4506   }
4507   if (!validateGWS(Inst, Operands)) {
4508     return false;
4509   }
4510 
4511   if (!validateBLGP(Inst, Operands)) {
4512     return false;
4513   }
4514 
4515   if (!validateDivScale(Inst)) {
4516     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4517     return false;
4518   }
4519   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4520     return false;
4521   }
4522 
4523   return true;
4524 }
4525 
4526 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4527                                             const FeatureBitset &FBS,
4528                                             unsigned VariantID = 0);
4529 
4530 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4531                                 const FeatureBitset &AvailableFeatures,
4532                                 unsigned VariantID);
4533 
4534 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4535                                        const FeatureBitset &FBS) {
4536   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4537 }
4538 
4539 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4540                                        const FeatureBitset &FBS,
4541                                        ArrayRef<unsigned> Variants) {
4542   for (auto Variant : Variants) {
4543     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4544       return true;
4545   }
4546 
4547   return false;
4548 }
4549 
4550 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4551                                                   const SMLoc &IDLoc) {
4552   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4553 
4554   // Check if requested instruction variant is supported.
4555   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4556     return false;
4557 
4558   // This instruction is not supported.
4559   // Clear any other pending errors because they are no longer relevant.
4560   getParser().clearPendingErrors();
4561 
4562   // Requested instruction variant is not supported.
4563   // Check if any other variants are supported.
4564   StringRef VariantName = getMatchedVariantName();
4565   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4566     return Error(IDLoc,
4567                  Twine(VariantName,
4568                        " variant of this instruction is not supported"));
4569   }
4570 
4571   // Finally check if this instruction is supported on any other GPU.
4572   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4573     return Error(IDLoc, "instruction not supported on this GPU");
4574   }
4575 
4576   // Instruction not supported on any GPU. Probably a typo.
4577   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4578   return Error(IDLoc, "invalid instruction" + Suggestion);
4579 }
4580 
4581 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4582                                               OperandVector &Operands,
4583                                               MCStreamer &Out,
4584                                               uint64_t &ErrorInfo,
4585                                               bool MatchingInlineAsm) {
4586   MCInst Inst;
4587   unsigned Result = Match_Success;
4588   for (auto Variant : getMatchedVariants()) {
4589     uint64_t EI;
4590     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4591                                   Variant);
4592     // We order match statuses from least to most specific. We use most specific
4593     // status as resulting
4594     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4595     if ((R == Match_Success) ||
4596         (R == Match_PreferE32) ||
4597         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4598         (R == Match_InvalidOperand && Result != Match_MissingFeature
4599                                    && Result != Match_PreferE32) ||
4600         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4601                                    && Result != Match_MissingFeature
4602                                    && Result != Match_PreferE32)) {
4603       Result = R;
4604       ErrorInfo = EI;
4605     }
4606     if (R == Match_Success)
4607       break;
4608   }
4609 
4610   if (Result == Match_Success) {
4611     if (!validateInstruction(Inst, IDLoc, Operands)) {
4612       return true;
4613     }
4614     Inst.setLoc(IDLoc);
4615     Out.emitInstruction(Inst, getSTI());
4616     return false;
4617   }
4618 
4619   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4620   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4621     return true;
4622   }
4623 
4624   switch (Result) {
4625   default: break;
4626   case Match_MissingFeature:
4627     // It has been verified that the specified instruction
4628     // mnemonic is valid. A match was found but it requires
4629     // features which are not supported on this GPU.
4630     return Error(IDLoc, "operands are not valid for this GPU or mode");
4631 
4632   case Match_InvalidOperand: {
4633     SMLoc ErrorLoc = IDLoc;
4634     if (ErrorInfo != ~0ULL) {
4635       if (ErrorInfo >= Operands.size()) {
4636         return Error(IDLoc, "too few operands for instruction");
4637       }
4638       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4639       if (ErrorLoc == SMLoc())
4640         ErrorLoc = IDLoc;
4641     }
4642     return Error(ErrorLoc, "invalid operand for instruction");
4643   }
4644 
4645   case Match_PreferE32:
4646     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4647                         "should be encoded as e32");
4648   case Match_MnemonicFail:
4649     llvm_unreachable("Invalid instructions should have been handled already");
4650   }
4651   llvm_unreachable("Implement any new match types added!");
4652 }
4653 
4654 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4655   int64_t Tmp = -1;
4656   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4657     return true;
4658   }
4659   if (getParser().parseAbsoluteExpression(Tmp)) {
4660     return true;
4661   }
4662   Ret = static_cast<uint32_t>(Tmp);
4663   return false;
4664 }
4665 
4666 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4667                                                uint32_t &Minor) {
4668   if (ParseAsAbsoluteExpression(Major))
4669     return TokError("invalid major version");
4670 
4671   if (!trySkipToken(AsmToken::Comma))
4672     return TokError("minor version number required, comma expected");
4673 
4674   if (ParseAsAbsoluteExpression(Minor))
4675     return TokError("invalid minor version");
4676 
4677   return false;
4678 }
4679 
4680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4681   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4682     return TokError("directive only supported for amdgcn architecture");
4683 
4684   std::string TargetIDDirective;
4685   SMLoc TargetStart = getTok().getLoc();
4686   if (getParser().parseEscapedString(TargetIDDirective))
4687     return true;
4688 
4689   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4690   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4691     return getParser().Error(TargetRange.Start,
4692         (Twine(".amdgcn_target directive's target id ") +
4693          Twine(TargetIDDirective) +
4694          Twine(" does not match the specified target id ") +
4695          Twine(getTargetStreamer().getTargetID()->toString())).str());
4696 
4697   return false;
4698 }
4699 
4700 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4701   return Error(Range.Start, "value out of range", Range);
4702 }
4703 
4704 bool AMDGPUAsmParser::calculateGPRBlocks(
4705     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4706     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4707     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4708     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4709   // TODO(scott.linder): These calculations are duplicated from
4710   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4711   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4712 
4713   unsigned NumVGPRs = NextFreeVGPR;
4714   unsigned NumSGPRs = NextFreeSGPR;
4715 
4716   if (Version.Major >= 10)
4717     NumSGPRs = 0;
4718   else {
4719     unsigned MaxAddressableNumSGPRs =
4720         IsaInfo::getAddressableNumSGPRs(&getSTI());
4721 
4722     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4723         NumSGPRs > MaxAddressableNumSGPRs)
4724       return OutOfRangeError(SGPRRange);
4725 
4726     NumSGPRs +=
4727         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4728 
4729     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4730         NumSGPRs > MaxAddressableNumSGPRs)
4731       return OutOfRangeError(SGPRRange);
4732 
4733     if (Features.test(FeatureSGPRInitBug))
4734       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4735   }
4736 
4737   VGPRBlocks =
4738       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4739   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4740 
4741   return false;
4742 }
4743 
4744 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4745   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4746     return TokError("directive only supported for amdgcn architecture");
4747 
4748   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4749     return TokError("directive only supported for amdhsa OS");
4750 
4751   StringRef KernelName;
4752   if (getParser().parseIdentifier(KernelName))
4753     return true;
4754 
4755   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4756 
4757   StringSet<> Seen;
4758 
4759   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4760 
4761   SMRange VGPRRange;
4762   uint64_t NextFreeVGPR = 0;
4763   uint64_t AccumOffset = 0;
4764   uint64_t SharedVGPRCount = 0;
4765   SMRange SGPRRange;
4766   uint64_t NextFreeSGPR = 0;
4767 
4768   // Count the number of user SGPRs implied from the enabled feature bits.
4769   unsigned ImpliedUserSGPRCount = 0;
4770 
4771   // Track if the asm explicitly contains the directive for the user SGPR
4772   // count.
4773   Optional<unsigned> ExplicitUserSGPRCount;
4774   bool ReserveVCC = true;
4775   bool ReserveFlatScr = true;
4776   Optional<bool> EnableWavefrontSize32;
4777 
4778   while (true) {
4779     while (trySkipToken(AsmToken::EndOfStatement));
4780 
4781     StringRef ID;
4782     SMRange IDRange = getTok().getLocRange();
4783     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4784       return true;
4785 
4786     if (ID == ".end_amdhsa_kernel")
4787       break;
4788 
4789     if (Seen.find(ID) != Seen.end())
4790       return TokError(".amdhsa_ directives cannot be repeated");
4791     Seen.insert(ID);
4792 
4793     SMLoc ValStart = getLoc();
4794     int64_t IVal;
4795     if (getParser().parseAbsoluteExpression(IVal))
4796       return true;
4797     SMLoc ValEnd = getLoc();
4798     SMRange ValRange = SMRange(ValStart, ValEnd);
4799 
4800     if (IVal < 0)
4801       return OutOfRangeError(ValRange);
4802 
4803     uint64_t Val = IVal;
4804 
4805 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4806   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4807     return OutOfRangeError(RANGE);                                             \
4808   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4809 
4810     if (ID == ".amdhsa_group_segment_fixed_size") {
4811       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4812         return OutOfRangeError(ValRange);
4813       KD.group_segment_fixed_size = Val;
4814     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4815       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4816         return OutOfRangeError(ValRange);
4817       KD.private_segment_fixed_size = Val;
4818     } else if (ID == ".amdhsa_kernarg_size") {
4819       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4820         return OutOfRangeError(ValRange);
4821       KD.kernarg_size = Val;
4822     } else if (ID == ".amdhsa_user_sgpr_count") {
4823       ExplicitUserSGPRCount = Val;
4824     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4825       if (hasArchitectedFlatScratch())
4826         return Error(IDRange.Start,
4827                      "directive is not supported with architected flat scratch",
4828                      IDRange);
4829       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4830                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4831                        Val, ValRange);
4832       if (Val)
4833         ImpliedUserSGPRCount += 4;
4834     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4835       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4836                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4837                        ValRange);
4838       if (Val)
4839         ImpliedUserSGPRCount += 2;
4840     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4841       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4842                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4843                        ValRange);
4844       if (Val)
4845         ImpliedUserSGPRCount += 2;
4846     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4847       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4848                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4849                        Val, ValRange);
4850       if (Val)
4851         ImpliedUserSGPRCount += 2;
4852     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4853       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4854                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4855                        ValRange);
4856       if (Val)
4857         ImpliedUserSGPRCount += 2;
4858     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4859       if (hasArchitectedFlatScratch())
4860         return Error(IDRange.Start,
4861                      "directive is not supported with architected flat scratch",
4862                      IDRange);
4863       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4864                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4865                        ValRange);
4866       if (Val)
4867         ImpliedUserSGPRCount += 2;
4868     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4869       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4870                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4871                        Val, ValRange);
4872       if (Val)
4873         ImpliedUserSGPRCount += 1;
4874     } else if (ID == ".amdhsa_wavefront_size32") {
4875       if (IVersion.Major < 10)
4876         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4877       EnableWavefrontSize32 = Val;
4878       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4879                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4880                        Val, ValRange);
4881     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4882       if (hasArchitectedFlatScratch())
4883         return Error(IDRange.Start,
4884                      "directive is not supported with architected flat scratch",
4885                      IDRange);
4886       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4887                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4888     } else if (ID == ".amdhsa_enable_private_segment") {
4889       if (!hasArchitectedFlatScratch())
4890         return Error(
4891             IDRange.Start,
4892             "directive is not supported without architected flat scratch",
4893             IDRange);
4894       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4895                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4896     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4897       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4898                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4899                        ValRange);
4900     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4901       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4902                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4903                        ValRange);
4904     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4905       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4906                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4907                        ValRange);
4908     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4909       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4910                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4911                        ValRange);
4912     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4913       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4914                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4915                        ValRange);
4916     } else if (ID == ".amdhsa_next_free_vgpr") {
4917       VGPRRange = ValRange;
4918       NextFreeVGPR = Val;
4919     } else if (ID == ".amdhsa_next_free_sgpr") {
4920       SGPRRange = ValRange;
4921       NextFreeSGPR = Val;
4922     } else if (ID == ".amdhsa_accum_offset") {
4923       if (!isGFX90A())
4924         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4925       AccumOffset = Val;
4926     } else if (ID == ".amdhsa_reserve_vcc") {
4927       if (!isUInt<1>(Val))
4928         return OutOfRangeError(ValRange);
4929       ReserveVCC = Val;
4930     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4931       if (IVersion.Major < 7)
4932         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4933       if (hasArchitectedFlatScratch())
4934         return Error(IDRange.Start,
4935                      "directive is not supported with architected flat scratch",
4936                      IDRange);
4937       if (!isUInt<1>(Val))
4938         return OutOfRangeError(ValRange);
4939       ReserveFlatScr = Val;
4940     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4941       if (IVersion.Major < 8)
4942         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4943       if (!isUInt<1>(Val))
4944         return OutOfRangeError(ValRange);
4945       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4946         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4947                                  IDRange);
4948     } else if (ID == ".amdhsa_float_round_mode_32") {
4949       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4950                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4951     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4953                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4954     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4955       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4956                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4957     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4959                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4960                        ValRange);
4961     } else if (ID == ".amdhsa_dx10_clamp") {
4962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4963                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4964     } else if (ID == ".amdhsa_ieee_mode") {
4965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4966                        Val, ValRange);
4967     } else if (ID == ".amdhsa_fp16_overflow") {
4968       if (IVersion.Major < 9)
4969         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4970       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4971                        ValRange);
4972     } else if (ID == ".amdhsa_tg_split") {
4973       if (!isGFX90A())
4974         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4975       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4976                        ValRange);
4977     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4978       if (IVersion.Major < 10)
4979         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4980       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4981                        ValRange);
4982     } else if (ID == ".amdhsa_memory_ordered") {
4983       if (IVersion.Major < 10)
4984         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4985       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4986                        ValRange);
4987     } else if (ID == ".amdhsa_forward_progress") {
4988       if (IVersion.Major < 10)
4989         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4990       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4991                        ValRange);
4992     } else if (ID == ".amdhsa_shared_vgpr_count") {
4993       if (IVersion.Major < 10)
4994         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4995       SharedVGPRCount = Val;
4996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4997                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4998                        ValRange);
4999     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5000       PARSE_BITS_ENTRY(
5001           KD.compute_pgm_rsrc2,
5002           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5003           ValRange);
5004     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5005       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5006                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5007                        Val, ValRange);
5008     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5009       PARSE_BITS_ENTRY(
5010           KD.compute_pgm_rsrc2,
5011           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5012           ValRange);
5013     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5015                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5016                        Val, ValRange);
5017     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5019                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5020                        Val, ValRange);
5021     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5023                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5024                        Val, ValRange);
5025     } else if (ID == ".amdhsa_exception_int_div_zero") {
5026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5028                        Val, ValRange);
5029     } else {
5030       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5031     }
5032 
5033 #undef PARSE_BITS_ENTRY
5034   }
5035 
5036   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5037     return TokError(".amdhsa_next_free_vgpr directive is required");
5038 
5039   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5040     return TokError(".amdhsa_next_free_sgpr directive is required");
5041 
5042   unsigned VGPRBlocks;
5043   unsigned SGPRBlocks;
5044   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5045                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5046                          EnableWavefrontSize32, NextFreeVGPR,
5047                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5048                          SGPRBlocks))
5049     return true;
5050 
5051   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5052           VGPRBlocks))
5053     return OutOfRangeError(VGPRRange);
5054   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5055                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5056 
5057   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5058           SGPRBlocks))
5059     return OutOfRangeError(SGPRRange);
5060   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5061                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5062                   SGPRBlocks);
5063 
5064   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5065     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5066                     "enabled user SGPRs");
5067 
5068   unsigned UserSGPRCount =
5069       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5070 
5071   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5072     return TokError("too many user SGPRs enabled");
5073   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5074                   UserSGPRCount);
5075 
5076   if (isGFX90A()) {
5077     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5078       return TokError(".amdhsa_accum_offset directive is required");
5079     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5080       return TokError("accum_offset should be in range [4..256] in "
5081                       "increments of 4");
5082     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5083       return TokError("accum_offset exceeds total VGPR allocation");
5084     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5085                     (AccumOffset / 4 - 1));
5086   }
5087 
5088   if (IVersion.Major == 10) {
5089     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5090     if (SharedVGPRCount && EnableWavefrontSize32) {
5091       return TokError("shared_vgpr_count directive not valid on "
5092                       "wavefront size 32");
5093     }
5094     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5095       return TokError("shared_vgpr_count*2 + "
5096                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5097                       "exceed 63\n");
5098     }
5099   }
5100 
5101   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5102       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5103       ReserveFlatScr);
5104   return false;
5105 }
5106 
5107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5108   uint32_t Major;
5109   uint32_t Minor;
5110 
5111   if (ParseDirectiveMajorMinor(Major, Minor))
5112     return true;
5113 
5114   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5115   return false;
5116 }
5117 
5118 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5119   uint32_t Major;
5120   uint32_t Minor;
5121   uint32_t Stepping;
5122   StringRef VendorName;
5123   StringRef ArchName;
5124 
5125   // If this directive has no arguments, then use the ISA version for the
5126   // targeted GPU.
5127   if (isToken(AsmToken::EndOfStatement)) {
5128     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5129     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5130                                                         ISA.Stepping,
5131                                                         "AMD", "AMDGPU");
5132     return false;
5133   }
5134 
5135   if (ParseDirectiveMajorMinor(Major, Minor))
5136     return true;
5137 
5138   if (!trySkipToken(AsmToken::Comma))
5139     return TokError("stepping version number required, comma expected");
5140 
5141   if (ParseAsAbsoluteExpression(Stepping))
5142     return TokError("invalid stepping version");
5143 
5144   if (!trySkipToken(AsmToken::Comma))
5145     return TokError("vendor name required, comma expected");
5146 
5147   if (!parseString(VendorName, "invalid vendor name"))
5148     return true;
5149 
5150   if (!trySkipToken(AsmToken::Comma))
5151     return TokError("arch name required, comma expected");
5152 
5153   if (!parseString(ArchName, "invalid arch name"))
5154     return true;
5155 
5156   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5157                                                       VendorName, ArchName);
5158   return false;
5159 }
5160 
5161 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5162                                                amd_kernel_code_t &Header) {
5163   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5164   // assembly for backwards compatibility.
5165   if (ID == "max_scratch_backing_memory_byte_size") {
5166     Parser.eatToEndOfStatement();
5167     return false;
5168   }
5169 
5170   SmallString<40> ErrStr;
5171   raw_svector_ostream Err(ErrStr);
5172   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5173     return TokError(Err.str());
5174   }
5175   Lex();
5176 
5177   if (ID == "enable_wavefront_size32") {
5178     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5179       if (!isGFX10Plus())
5180         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5181       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5182         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5183     } else {
5184       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5185         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5186     }
5187   }
5188 
5189   if (ID == "wavefront_size") {
5190     if (Header.wavefront_size == 5) {
5191       if (!isGFX10Plus())
5192         return TokError("wavefront_size=5 is only allowed on GFX10+");
5193       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5194         return TokError("wavefront_size=5 requires +WavefrontSize32");
5195     } else if (Header.wavefront_size == 6) {
5196       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5197         return TokError("wavefront_size=6 requires +WavefrontSize64");
5198     }
5199   }
5200 
5201   if (ID == "enable_wgp_mode") {
5202     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5203         !isGFX10Plus())
5204       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5205   }
5206 
5207   if (ID == "enable_mem_ordered") {
5208     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5209         !isGFX10Plus())
5210       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5211   }
5212 
5213   if (ID == "enable_fwd_progress") {
5214     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5215         !isGFX10Plus())
5216       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5217   }
5218 
5219   return false;
5220 }
5221 
5222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5223   amd_kernel_code_t Header;
5224   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5225 
5226   while (true) {
5227     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5228     // will set the current token to EndOfStatement.
5229     while(trySkipToken(AsmToken::EndOfStatement));
5230 
5231     StringRef ID;
5232     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5233       return true;
5234 
5235     if (ID == ".end_amd_kernel_code_t")
5236       break;
5237 
5238     if (ParseAMDKernelCodeTValue(ID, Header))
5239       return true;
5240   }
5241 
5242   getTargetStreamer().EmitAMDKernelCodeT(Header);
5243 
5244   return false;
5245 }
5246 
5247 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5248   StringRef KernelName;
5249   if (!parseId(KernelName, "expected symbol name"))
5250     return true;
5251 
5252   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5253                                            ELF::STT_AMDGPU_HSA_KERNEL);
5254 
5255   KernelScope.initialize(getContext());
5256   return false;
5257 }
5258 
5259 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5260   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5261     return Error(getLoc(),
5262                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5263                  "architectures");
5264   }
5265 
5266   auto TargetIDDirective = getLexer().getTok().getStringContents();
5267   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5268     return Error(getParser().getTok().getLoc(), "target id must match options");
5269 
5270   getTargetStreamer().EmitISAVersion();
5271   Lex();
5272 
5273   return false;
5274 }
5275 
5276 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5277   const char *AssemblerDirectiveBegin;
5278   const char *AssemblerDirectiveEnd;
5279   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5280       isHsaAbiVersion3AndAbove(&getSTI())
5281           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5282                             HSAMD::V3::AssemblerDirectiveEnd)
5283           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5284                             HSAMD::AssemblerDirectiveEnd);
5285 
5286   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5287     return Error(getLoc(),
5288                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5289                  "not available on non-amdhsa OSes")).str());
5290   }
5291 
5292   std::string HSAMetadataString;
5293   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5294                           HSAMetadataString))
5295     return true;
5296 
5297   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5298     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5299       return Error(getLoc(), "invalid HSA metadata");
5300   } else {
5301     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5302       return Error(getLoc(), "invalid HSA metadata");
5303   }
5304 
5305   return false;
5306 }
5307 
5308 /// Common code to parse out a block of text (typically YAML) between start and
5309 /// end directives.
5310 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5311                                           const char *AssemblerDirectiveEnd,
5312                                           std::string &CollectString) {
5313 
5314   raw_string_ostream CollectStream(CollectString);
5315 
5316   getLexer().setSkipSpace(false);
5317 
5318   bool FoundEnd = false;
5319   while (!isToken(AsmToken::Eof)) {
5320     while (isToken(AsmToken::Space)) {
5321       CollectStream << getTokenStr();
5322       Lex();
5323     }
5324 
5325     if (trySkipId(AssemblerDirectiveEnd)) {
5326       FoundEnd = true;
5327       break;
5328     }
5329 
5330     CollectStream << Parser.parseStringToEndOfStatement()
5331                   << getContext().getAsmInfo()->getSeparatorString();
5332 
5333     Parser.eatToEndOfStatement();
5334   }
5335 
5336   getLexer().setSkipSpace(true);
5337 
5338   if (isToken(AsmToken::Eof) && !FoundEnd) {
5339     return TokError(Twine("expected directive ") +
5340                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5341   }
5342 
5343   CollectStream.flush();
5344   return false;
5345 }
5346 
5347 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5348 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5349   std::string String;
5350   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5351                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5352     return true;
5353 
5354   auto PALMetadata = getTargetStreamer().getPALMetadata();
5355   if (!PALMetadata->setFromString(String))
5356     return Error(getLoc(), "invalid PAL metadata");
5357   return false;
5358 }
5359 
5360 /// Parse the assembler directive for old linear-format PAL metadata.
5361 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5362   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5363     return Error(getLoc(),
5364                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5365                  "not available on non-amdpal OSes")).str());
5366   }
5367 
5368   auto PALMetadata = getTargetStreamer().getPALMetadata();
5369   PALMetadata->setLegacy();
5370   for (;;) {
5371     uint32_t Key, Value;
5372     if (ParseAsAbsoluteExpression(Key)) {
5373       return TokError(Twine("invalid value in ") +
5374                       Twine(PALMD::AssemblerDirective));
5375     }
5376     if (!trySkipToken(AsmToken::Comma)) {
5377       return TokError(Twine("expected an even number of values in ") +
5378                       Twine(PALMD::AssemblerDirective));
5379     }
5380     if (ParseAsAbsoluteExpression(Value)) {
5381       return TokError(Twine("invalid value in ") +
5382                       Twine(PALMD::AssemblerDirective));
5383     }
5384     PALMetadata->setRegister(Key, Value);
5385     if (!trySkipToken(AsmToken::Comma))
5386       break;
5387   }
5388   return false;
5389 }
5390 
5391 /// ParseDirectiveAMDGPULDS
5392 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5393 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5394   if (getParser().checkForValidSection())
5395     return true;
5396 
5397   StringRef Name;
5398   SMLoc NameLoc = getLoc();
5399   if (getParser().parseIdentifier(Name))
5400     return TokError("expected identifier in directive");
5401 
5402   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5403   if (parseToken(AsmToken::Comma, "expected ','"))
5404     return true;
5405 
5406   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5407 
5408   int64_t Size;
5409   SMLoc SizeLoc = getLoc();
5410   if (getParser().parseAbsoluteExpression(Size))
5411     return true;
5412   if (Size < 0)
5413     return Error(SizeLoc, "size must be non-negative");
5414   if (Size > LocalMemorySize)
5415     return Error(SizeLoc, "size is too large");
5416 
5417   int64_t Alignment = 4;
5418   if (trySkipToken(AsmToken::Comma)) {
5419     SMLoc AlignLoc = getLoc();
5420     if (getParser().parseAbsoluteExpression(Alignment))
5421       return true;
5422     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5423       return Error(AlignLoc, "alignment must be a power of two");
5424 
5425     // Alignment larger than the size of LDS is possible in theory, as long
5426     // as the linker manages to place to symbol at address 0, but we do want
5427     // to make sure the alignment fits nicely into a 32-bit integer.
5428     if (Alignment >= 1u << 31)
5429       return Error(AlignLoc, "alignment is too large");
5430   }
5431 
5432   if (parseToken(AsmToken::EndOfStatement,
5433                  "unexpected token in '.amdgpu_lds' directive"))
5434     return true;
5435 
5436   Symbol->redefineIfPossible();
5437   if (!Symbol->isUndefined())
5438     return Error(NameLoc, "invalid symbol redefinition");
5439 
5440   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5441   return false;
5442 }
5443 
5444 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5445   StringRef IDVal = DirectiveID.getString();
5446 
5447   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5448     if (IDVal == ".amdhsa_kernel")
5449      return ParseDirectiveAMDHSAKernel();
5450 
5451     // TODO: Restructure/combine with PAL metadata directive.
5452     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5453       return ParseDirectiveHSAMetadata();
5454   } else {
5455     if (IDVal == ".hsa_code_object_version")
5456       return ParseDirectiveHSACodeObjectVersion();
5457 
5458     if (IDVal == ".hsa_code_object_isa")
5459       return ParseDirectiveHSACodeObjectISA();
5460 
5461     if (IDVal == ".amd_kernel_code_t")
5462       return ParseDirectiveAMDKernelCodeT();
5463 
5464     if (IDVal == ".amdgpu_hsa_kernel")
5465       return ParseDirectiveAMDGPUHsaKernel();
5466 
5467     if (IDVal == ".amd_amdgpu_isa")
5468       return ParseDirectiveISAVersion();
5469 
5470     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5471       return ParseDirectiveHSAMetadata();
5472   }
5473 
5474   if (IDVal == ".amdgcn_target")
5475     return ParseDirectiveAMDGCNTarget();
5476 
5477   if (IDVal == ".amdgpu_lds")
5478     return ParseDirectiveAMDGPULDS();
5479 
5480   if (IDVal == PALMD::AssemblerDirectiveBegin)
5481     return ParseDirectivePALMetadataBegin();
5482 
5483   if (IDVal == PALMD::AssemblerDirective)
5484     return ParseDirectivePALMetadata();
5485 
5486   return true;
5487 }
5488 
5489 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5490                                            unsigned RegNo) {
5491 
5492   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5493     return isGFX9Plus();
5494 
5495   // GFX10 has 2 more SGPRs 104 and 105.
5496   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5497     return hasSGPR104_SGPR105();
5498 
5499   switch (RegNo) {
5500   case AMDGPU::SRC_SHARED_BASE:
5501   case AMDGPU::SRC_SHARED_LIMIT:
5502   case AMDGPU::SRC_PRIVATE_BASE:
5503   case AMDGPU::SRC_PRIVATE_LIMIT:
5504   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5505     return isGFX9Plus();
5506   case AMDGPU::TBA:
5507   case AMDGPU::TBA_LO:
5508   case AMDGPU::TBA_HI:
5509   case AMDGPU::TMA:
5510   case AMDGPU::TMA_LO:
5511   case AMDGPU::TMA_HI:
5512     return !isGFX9Plus();
5513   case AMDGPU::XNACK_MASK:
5514   case AMDGPU::XNACK_MASK_LO:
5515   case AMDGPU::XNACK_MASK_HI:
5516     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5517   case AMDGPU::SGPR_NULL:
5518     return isGFX10Plus();
5519   default:
5520     break;
5521   }
5522 
5523   if (isCI())
5524     return true;
5525 
5526   if (isSI() || isGFX10Plus()) {
5527     // No flat_scr on SI.
5528     // On GFX10 flat scratch is not a valid register operand and can only be
5529     // accessed with s_setreg/s_getreg.
5530     switch (RegNo) {
5531     case AMDGPU::FLAT_SCR:
5532     case AMDGPU::FLAT_SCR_LO:
5533     case AMDGPU::FLAT_SCR_HI:
5534       return false;
5535     default:
5536       return true;
5537     }
5538   }
5539 
5540   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5541   // SI/CI have.
5542   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5543     return hasSGPR102_SGPR103();
5544 
5545   return true;
5546 }
5547 
5548 OperandMatchResultTy
5549 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5550                               OperandMode Mode) {
5551   // Try to parse with a custom parser
5552   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5553 
5554   // If we successfully parsed the operand or if there as an error parsing,
5555   // we are done.
5556   //
5557   // If we are parsing after we reach EndOfStatement then this means we
5558   // are appending default values to the Operands list.  This is only done
5559   // by custom parser, so we shouldn't continue on to the generic parsing.
5560   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5561       isToken(AsmToken::EndOfStatement))
5562     return ResTy;
5563 
5564   SMLoc RBraceLoc;
5565   SMLoc LBraceLoc = getLoc();
5566   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5567     unsigned Prefix = Operands.size();
5568 
5569     for (;;) {
5570       auto Loc = getLoc();
5571       ResTy = parseReg(Operands);
5572       if (ResTy == MatchOperand_NoMatch)
5573         Error(Loc, "expected a register");
5574       if (ResTy != MatchOperand_Success)
5575         return MatchOperand_ParseFail;
5576 
5577       RBraceLoc = getLoc();
5578       if (trySkipToken(AsmToken::RBrac))
5579         break;
5580 
5581       if (!skipToken(AsmToken::Comma,
5582                      "expected a comma or a closing square bracket")) {
5583         return MatchOperand_ParseFail;
5584       }
5585     }
5586 
5587     if (Operands.size() - Prefix > 1) {
5588       Operands.insert(Operands.begin() + Prefix,
5589                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5590       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5591     }
5592 
5593     return MatchOperand_Success;
5594   }
5595 
5596   return parseRegOrImm(Operands);
5597 }
5598 
5599 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5600   // Clear any forced encodings from the previous instruction.
5601   setForcedEncodingSize(0);
5602   setForcedDPP(false);
5603   setForcedSDWA(false);
5604 
5605   if (Name.endswith("_e64")) {
5606     setForcedEncodingSize(64);
5607     return Name.substr(0, Name.size() - 4);
5608   } else if (Name.endswith("_e32")) {
5609     setForcedEncodingSize(32);
5610     return Name.substr(0, Name.size() - 4);
5611   } else if (Name.endswith("_dpp")) {
5612     setForcedDPP(true);
5613     return Name.substr(0, Name.size() - 4);
5614   } else if (Name.endswith("_sdwa")) {
5615     setForcedSDWA(true);
5616     return Name.substr(0, Name.size() - 5);
5617   }
5618   return Name;
5619 }
5620 
5621 static void applyMnemonicAliases(StringRef &Mnemonic,
5622                                  const FeatureBitset &Features,
5623                                  unsigned VariantID);
5624 
5625 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5626                                        StringRef Name,
5627                                        SMLoc NameLoc, OperandVector &Operands) {
5628   // Add the instruction mnemonic
5629   Name = parseMnemonicSuffix(Name);
5630 
5631   // If the target architecture uses MnemonicAlias, call it here to parse
5632   // operands correctly.
5633   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5634 
5635   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5636 
5637   bool IsMIMG = Name.startswith("image_");
5638 
5639   while (!trySkipToken(AsmToken::EndOfStatement)) {
5640     OperandMode Mode = OperandMode_Default;
5641     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5642       Mode = OperandMode_NSA;
5643     CPolSeen = 0;
5644     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5645 
5646     if (Res != MatchOperand_Success) {
5647       checkUnsupportedInstruction(Name, NameLoc);
5648       if (!Parser.hasPendingError()) {
5649         // FIXME: use real operand location rather than the current location.
5650         StringRef Msg =
5651           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5652                                             "not a valid operand.";
5653         Error(getLoc(), Msg);
5654       }
5655       while (!trySkipToken(AsmToken::EndOfStatement)) {
5656         lex();
5657       }
5658       return true;
5659     }
5660 
5661     // Eat the comma or space if there is one.
5662     trySkipToken(AsmToken::Comma);
5663   }
5664 
5665   return false;
5666 }
5667 
5668 //===----------------------------------------------------------------------===//
5669 // Utility functions
5670 //===----------------------------------------------------------------------===//
5671 
5672 OperandMatchResultTy
5673 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5674 
5675   if (!trySkipId(Prefix, AsmToken::Colon))
5676     return MatchOperand_NoMatch;
5677 
5678   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5679 }
5680 
5681 OperandMatchResultTy
5682 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5683                                     AMDGPUOperand::ImmTy ImmTy,
5684                                     bool (*ConvertResult)(int64_t&)) {
5685   SMLoc S = getLoc();
5686   int64_t Value = 0;
5687 
5688   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5689   if (Res != MatchOperand_Success)
5690     return Res;
5691 
5692   if (ConvertResult && !ConvertResult(Value)) {
5693     Error(S, "invalid " + StringRef(Prefix) + " value.");
5694   }
5695 
5696   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5697   return MatchOperand_Success;
5698 }
5699 
5700 OperandMatchResultTy
5701 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5702                                              OperandVector &Operands,
5703                                              AMDGPUOperand::ImmTy ImmTy,
5704                                              bool (*ConvertResult)(int64_t&)) {
5705   SMLoc S = getLoc();
5706   if (!trySkipId(Prefix, AsmToken::Colon))
5707     return MatchOperand_NoMatch;
5708 
5709   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5710     return MatchOperand_ParseFail;
5711 
5712   unsigned Val = 0;
5713   const unsigned MaxSize = 4;
5714 
5715   // FIXME: How to verify the number of elements matches the number of src
5716   // operands?
5717   for (int I = 0; ; ++I) {
5718     int64_t Op;
5719     SMLoc Loc = getLoc();
5720     if (!parseExpr(Op))
5721       return MatchOperand_ParseFail;
5722 
5723     if (Op != 0 && Op != 1) {
5724       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5725       return MatchOperand_ParseFail;
5726     }
5727 
5728     Val |= (Op << I);
5729 
5730     if (trySkipToken(AsmToken::RBrac))
5731       break;
5732 
5733     if (I + 1 == MaxSize) {
5734       Error(getLoc(), "expected a closing square bracket");
5735       return MatchOperand_ParseFail;
5736     }
5737 
5738     if (!skipToken(AsmToken::Comma, "expected a comma"))
5739       return MatchOperand_ParseFail;
5740   }
5741 
5742   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5743   return MatchOperand_Success;
5744 }
5745 
5746 OperandMatchResultTy
5747 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5748                                AMDGPUOperand::ImmTy ImmTy) {
5749   int64_t Bit;
5750   SMLoc S = getLoc();
5751 
5752   if (trySkipId(Name)) {
5753     Bit = 1;
5754   } else if (trySkipId("no", Name)) {
5755     Bit = 0;
5756   } else {
5757     return MatchOperand_NoMatch;
5758   }
5759 
5760   if (Name == "r128" && !hasMIMG_R128()) {
5761     Error(S, "r128 modifier is not supported on this GPU");
5762     return MatchOperand_ParseFail;
5763   }
5764   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5765     Error(S, "a16 modifier is not supported on this GPU");
5766     return MatchOperand_ParseFail;
5767   }
5768 
5769   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5770     ImmTy = AMDGPUOperand::ImmTyR128A16;
5771 
5772   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5773   return MatchOperand_Success;
5774 }
5775 
5776 OperandMatchResultTy
5777 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5778   unsigned CPolOn = 0;
5779   unsigned CPolOff = 0;
5780   SMLoc S = getLoc();
5781 
5782   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5783   if (isGFX940() && !Mnemo.startswith("s_")) {
5784     if (trySkipId("sc0"))
5785       CPolOn = AMDGPU::CPol::SC0;
5786     else if (trySkipId("nosc0"))
5787       CPolOff = AMDGPU::CPol::SC0;
5788     else if (trySkipId("nt"))
5789       CPolOn = AMDGPU::CPol::NT;
5790     else if (trySkipId("nont"))
5791       CPolOff = AMDGPU::CPol::NT;
5792     else if (trySkipId("sc1"))
5793       CPolOn = AMDGPU::CPol::SC1;
5794     else if (trySkipId("nosc1"))
5795       CPolOff = AMDGPU::CPol::SC1;
5796     else
5797       return MatchOperand_NoMatch;
5798   }
5799   else if (trySkipId("glc"))
5800     CPolOn = AMDGPU::CPol::GLC;
5801   else if (trySkipId("noglc"))
5802     CPolOff = AMDGPU::CPol::GLC;
5803   else if (trySkipId("slc"))
5804     CPolOn = AMDGPU::CPol::SLC;
5805   else if (trySkipId("noslc"))
5806     CPolOff = AMDGPU::CPol::SLC;
5807   else if (trySkipId("dlc"))
5808     CPolOn = AMDGPU::CPol::DLC;
5809   else if (trySkipId("nodlc"))
5810     CPolOff = AMDGPU::CPol::DLC;
5811   else if (trySkipId("scc"))
5812     CPolOn = AMDGPU::CPol::SCC;
5813   else if (trySkipId("noscc"))
5814     CPolOff = AMDGPU::CPol::SCC;
5815   else
5816     return MatchOperand_NoMatch;
5817 
5818   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5819     Error(S, "dlc modifier is not supported on this GPU");
5820     return MatchOperand_ParseFail;
5821   }
5822 
5823   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5824     Error(S, "scc modifier is not supported on this GPU");
5825     return MatchOperand_ParseFail;
5826   }
5827 
5828   if (CPolSeen & (CPolOn | CPolOff)) {
5829     Error(S, "duplicate cache policy modifier");
5830     return MatchOperand_ParseFail;
5831   }
5832 
5833   CPolSeen |= (CPolOn | CPolOff);
5834 
5835   for (unsigned I = 1; I != Operands.size(); ++I) {
5836     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5837     if (Op.isCPol()) {
5838       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5839       return MatchOperand_Success;
5840     }
5841   }
5842 
5843   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5844                                               AMDGPUOperand::ImmTyCPol));
5845 
5846   return MatchOperand_Success;
5847 }
5848 
5849 static void addOptionalImmOperand(
5850   MCInst& Inst, const OperandVector& Operands,
5851   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5852   AMDGPUOperand::ImmTy ImmT,
5853   int64_t Default = 0) {
5854   auto i = OptionalIdx.find(ImmT);
5855   if (i != OptionalIdx.end()) {
5856     unsigned Idx = i->second;
5857     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5858   } else {
5859     Inst.addOperand(MCOperand::createImm(Default));
5860   }
5861 }
5862 
5863 OperandMatchResultTy
5864 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5865                                        StringRef &Value,
5866                                        SMLoc &StringLoc) {
5867   if (!trySkipId(Prefix, AsmToken::Colon))
5868     return MatchOperand_NoMatch;
5869 
5870   StringLoc = getLoc();
5871   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5872                                                   : MatchOperand_ParseFail;
5873 }
5874 
5875 //===----------------------------------------------------------------------===//
5876 // MTBUF format
5877 //===----------------------------------------------------------------------===//
5878 
5879 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5880                                   int64_t MaxVal,
5881                                   int64_t &Fmt) {
5882   int64_t Val;
5883   SMLoc Loc = getLoc();
5884 
5885   auto Res = parseIntWithPrefix(Pref, Val);
5886   if (Res == MatchOperand_ParseFail)
5887     return false;
5888   if (Res == MatchOperand_NoMatch)
5889     return true;
5890 
5891   if (Val < 0 || Val > MaxVal) {
5892     Error(Loc, Twine("out of range ", StringRef(Pref)));
5893     return false;
5894   }
5895 
5896   Fmt = Val;
5897   return true;
5898 }
5899 
5900 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5901 // values to live in a joint format operand in the MCInst encoding.
5902 OperandMatchResultTy
5903 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5904   using namespace llvm::AMDGPU::MTBUFFormat;
5905 
5906   int64_t Dfmt = DFMT_UNDEF;
5907   int64_t Nfmt = NFMT_UNDEF;
5908 
5909   // dfmt and nfmt can appear in either order, and each is optional.
5910   for (int I = 0; I < 2; ++I) {
5911     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5912       return MatchOperand_ParseFail;
5913 
5914     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5915       return MatchOperand_ParseFail;
5916     }
5917     // Skip optional comma between dfmt/nfmt
5918     // but guard against 2 commas following each other.
5919     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5920         !peekToken().is(AsmToken::Comma)) {
5921       trySkipToken(AsmToken::Comma);
5922     }
5923   }
5924 
5925   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5926     return MatchOperand_NoMatch;
5927 
5928   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5929   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5930 
5931   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5932   return MatchOperand_Success;
5933 }
5934 
5935 OperandMatchResultTy
5936 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5937   using namespace llvm::AMDGPU::MTBUFFormat;
5938 
5939   int64_t Fmt = UFMT_UNDEF;
5940 
5941   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5942     return MatchOperand_ParseFail;
5943 
5944   if (Fmt == UFMT_UNDEF)
5945     return MatchOperand_NoMatch;
5946 
5947   Format = Fmt;
5948   return MatchOperand_Success;
5949 }
5950 
5951 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5952                                     int64_t &Nfmt,
5953                                     StringRef FormatStr,
5954                                     SMLoc Loc) {
5955   using namespace llvm::AMDGPU::MTBUFFormat;
5956   int64_t Format;
5957 
5958   Format = getDfmt(FormatStr);
5959   if (Format != DFMT_UNDEF) {
5960     Dfmt = Format;
5961     return true;
5962   }
5963 
5964   Format = getNfmt(FormatStr, getSTI());
5965   if (Format != NFMT_UNDEF) {
5966     Nfmt = Format;
5967     return true;
5968   }
5969 
5970   Error(Loc, "unsupported format");
5971   return false;
5972 }
5973 
5974 OperandMatchResultTy
5975 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5976                                           SMLoc FormatLoc,
5977                                           int64_t &Format) {
5978   using namespace llvm::AMDGPU::MTBUFFormat;
5979 
5980   int64_t Dfmt = DFMT_UNDEF;
5981   int64_t Nfmt = NFMT_UNDEF;
5982   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5983     return MatchOperand_ParseFail;
5984 
5985   if (trySkipToken(AsmToken::Comma)) {
5986     StringRef Str;
5987     SMLoc Loc = getLoc();
5988     if (!parseId(Str, "expected a format string") ||
5989         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5990       return MatchOperand_ParseFail;
5991     }
5992     if (Dfmt == DFMT_UNDEF) {
5993       Error(Loc, "duplicate numeric format");
5994       return MatchOperand_ParseFail;
5995     } else if (Nfmt == NFMT_UNDEF) {
5996       Error(Loc, "duplicate data format");
5997       return MatchOperand_ParseFail;
5998     }
5999   }
6000 
6001   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6002   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6003 
6004   if (isGFX10Plus()) {
6005     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6006     if (Ufmt == UFMT_UNDEF) {
6007       Error(FormatLoc, "unsupported format");
6008       return MatchOperand_ParseFail;
6009     }
6010     Format = Ufmt;
6011   } else {
6012     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6013   }
6014 
6015   return MatchOperand_Success;
6016 }
6017 
6018 OperandMatchResultTy
6019 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6020                                             SMLoc Loc,
6021                                             int64_t &Format) {
6022   using namespace llvm::AMDGPU::MTBUFFormat;
6023 
6024   auto Id = getUnifiedFormat(FormatStr, getSTI());
6025   if (Id == UFMT_UNDEF)
6026     return MatchOperand_NoMatch;
6027 
6028   if (!isGFX10Plus()) {
6029     Error(Loc, "unified format is not supported on this GPU");
6030     return MatchOperand_ParseFail;
6031   }
6032 
6033   Format = Id;
6034   return MatchOperand_Success;
6035 }
6036 
6037 OperandMatchResultTy
6038 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6039   using namespace llvm::AMDGPU::MTBUFFormat;
6040   SMLoc Loc = getLoc();
6041 
6042   if (!parseExpr(Format))
6043     return MatchOperand_ParseFail;
6044   if (!isValidFormatEncoding(Format, getSTI())) {
6045     Error(Loc, "out of range format");
6046     return MatchOperand_ParseFail;
6047   }
6048 
6049   return MatchOperand_Success;
6050 }
6051 
6052 OperandMatchResultTy
6053 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6054   using namespace llvm::AMDGPU::MTBUFFormat;
6055 
6056   if (!trySkipId("format", AsmToken::Colon))
6057     return MatchOperand_NoMatch;
6058 
6059   if (trySkipToken(AsmToken::LBrac)) {
6060     StringRef FormatStr;
6061     SMLoc Loc = getLoc();
6062     if (!parseId(FormatStr, "expected a format string"))
6063       return MatchOperand_ParseFail;
6064 
6065     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6066     if (Res == MatchOperand_NoMatch)
6067       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6068     if (Res != MatchOperand_Success)
6069       return Res;
6070 
6071     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6072       return MatchOperand_ParseFail;
6073 
6074     return MatchOperand_Success;
6075   }
6076 
6077   return parseNumericFormat(Format);
6078 }
6079 
6080 OperandMatchResultTy
6081 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6082   using namespace llvm::AMDGPU::MTBUFFormat;
6083 
6084   int64_t Format = getDefaultFormatEncoding(getSTI());
6085   OperandMatchResultTy Res;
6086   SMLoc Loc = getLoc();
6087 
6088   // Parse legacy format syntax.
6089   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6090   if (Res == MatchOperand_ParseFail)
6091     return Res;
6092 
6093   bool FormatFound = (Res == MatchOperand_Success);
6094 
6095   Operands.push_back(
6096     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6097 
6098   if (FormatFound)
6099     trySkipToken(AsmToken::Comma);
6100 
6101   if (isToken(AsmToken::EndOfStatement)) {
6102     // We are expecting an soffset operand,
6103     // but let matcher handle the error.
6104     return MatchOperand_Success;
6105   }
6106 
6107   // Parse soffset.
6108   Res = parseRegOrImm(Operands);
6109   if (Res != MatchOperand_Success)
6110     return Res;
6111 
6112   trySkipToken(AsmToken::Comma);
6113 
6114   if (!FormatFound) {
6115     Res = parseSymbolicOrNumericFormat(Format);
6116     if (Res == MatchOperand_ParseFail)
6117       return Res;
6118     if (Res == MatchOperand_Success) {
6119       auto Size = Operands.size();
6120       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6121       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6122       Op.setImm(Format);
6123     }
6124     return MatchOperand_Success;
6125   }
6126 
6127   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6128     Error(getLoc(), "duplicate format");
6129     return MatchOperand_ParseFail;
6130   }
6131   return MatchOperand_Success;
6132 }
6133 
6134 //===----------------------------------------------------------------------===//
6135 // ds
6136 //===----------------------------------------------------------------------===//
6137 
6138 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6139                                     const OperandVector &Operands) {
6140   OptionalImmIndexMap OptionalIdx;
6141 
6142   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6143     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6144 
6145     // Add the register arguments
6146     if (Op.isReg()) {
6147       Op.addRegOperands(Inst, 1);
6148       continue;
6149     }
6150 
6151     // Handle optional arguments
6152     OptionalIdx[Op.getImmTy()] = i;
6153   }
6154 
6155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6158 
6159   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6160 }
6161 
6162 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6163                                 bool IsGdsHardcoded) {
6164   OptionalImmIndexMap OptionalIdx;
6165 
6166   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6167     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6168 
6169     // Add the register arguments
6170     if (Op.isReg()) {
6171       Op.addRegOperands(Inst, 1);
6172       continue;
6173     }
6174 
6175     if (Op.isToken() && Op.getToken() == "gds") {
6176       IsGdsHardcoded = true;
6177       continue;
6178     }
6179 
6180     // Handle optional arguments
6181     OptionalIdx[Op.getImmTy()] = i;
6182   }
6183 
6184   AMDGPUOperand::ImmTy OffsetType =
6185     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6186      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6187      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6188                                                       AMDGPUOperand::ImmTyOffset;
6189 
6190   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6191 
6192   if (!IsGdsHardcoded) {
6193     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6194   }
6195   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6196 }
6197 
6198 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6199   OptionalImmIndexMap OptionalIdx;
6200 
6201   unsigned OperandIdx[4];
6202   unsigned EnMask = 0;
6203   int SrcIdx = 0;
6204 
6205   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6206     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6207 
6208     // Add the register arguments
6209     if (Op.isReg()) {
6210       assert(SrcIdx < 4);
6211       OperandIdx[SrcIdx] = Inst.size();
6212       Op.addRegOperands(Inst, 1);
6213       ++SrcIdx;
6214       continue;
6215     }
6216 
6217     if (Op.isOff()) {
6218       assert(SrcIdx < 4);
6219       OperandIdx[SrcIdx] = Inst.size();
6220       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6221       ++SrcIdx;
6222       continue;
6223     }
6224 
6225     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6226       Op.addImmOperands(Inst, 1);
6227       continue;
6228     }
6229 
6230     if (Op.isToken() && Op.getToken() == "done")
6231       continue;
6232 
6233     // Handle optional arguments
6234     OptionalIdx[Op.getImmTy()] = i;
6235   }
6236 
6237   assert(SrcIdx == 4);
6238 
6239   bool Compr = false;
6240   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6241     Compr = true;
6242     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6243     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6244     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6245   }
6246 
6247   for (auto i = 0; i < SrcIdx; ++i) {
6248     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6249       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6250     }
6251   }
6252 
6253   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6254   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6255 
6256   Inst.addOperand(MCOperand::createImm(EnMask));
6257 }
6258 
6259 //===----------------------------------------------------------------------===//
6260 // s_waitcnt
6261 //===----------------------------------------------------------------------===//
6262 
6263 static bool
6264 encodeCnt(
6265   const AMDGPU::IsaVersion ISA,
6266   int64_t &IntVal,
6267   int64_t CntVal,
6268   bool Saturate,
6269   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6270   unsigned (*decode)(const IsaVersion &Version, unsigned))
6271 {
6272   bool Failed = false;
6273 
6274   IntVal = encode(ISA, IntVal, CntVal);
6275   if (CntVal != decode(ISA, IntVal)) {
6276     if (Saturate) {
6277       IntVal = encode(ISA, IntVal, -1);
6278     } else {
6279       Failed = true;
6280     }
6281   }
6282   return Failed;
6283 }
6284 
6285 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6286 
6287   SMLoc CntLoc = getLoc();
6288   StringRef CntName = getTokenStr();
6289 
6290   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6291       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6292     return false;
6293 
6294   int64_t CntVal;
6295   SMLoc ValLoc = getLoc();
6296   if (!parseExpr(CntVal))
6297     return false;
6298 
6299   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6300 
6301   bool Failed = true;
6302   bool Sat = CntName.endswith("_sat");
6303 
6304   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6305     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6306   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6307     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6308   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6309     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6310   } else {
6311     Error(CntLoc, "invalid counter name " + CntName);
6312     return false;
6313   }
6314 
6315   if (Failed) {
6316     Error(ValLoc, "too large value for " + CntName);
6317     return false;
6318   }
6319 
6320   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6321     return false;
6322 
6323   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6324     if (isToken(AsmToken::EndOfStatement)) {
6325       Error(getLoc(), "expected a counter name");
6326       return false;
6327     }
6328   }
6329 
6330   return true;
6331 }
6332 
6333 OperandMatchResultTy
6334 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6335   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6336   int64_t Waitcnt = getWaitcntBitMask(ISA);
6337   SMLoc S = getLoc();
6338 
6339   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6340     while (!isToken(AsmToken::EndOfStatement)) {
6341       if (!parseCnt(Waitcnt))
6342         return MatchOperand_ParseFail;
6343     }
6344   } else {
6345     if (!parseExpr(Waitcnt))
6346       return MatchOperand_ParseFail;
6347   }
6348 
6349   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6350   return MatchOperand_Success;
6351 }
6352 
6353 bool
6354 AMDGPUOperand::isSWaitCnt() const {
6355   return isImm();
6356 }
6357 
6358 //===----------------------------------------------------------------------===//
6359 // DepCtr
6360 //===----------------------------------------------------------------------===//
6361 
6362 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6363                                   StringRef DepCtrName) {
6364   switch (ErrorId) {
6365   case OPR_ID_UNKNOWN:
6366     Error(Loc, Twine("invalid counter name ", DepCtrName));
6367     return;
6368   case OPR_ID_UNSUPPORTED:
6369     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6370     return;
6371   case OPR_ID_DUPLICATE:
6372     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6373     return;
6374   case OPR_VAL_INVALID:
6375     Error(Loc, Twine("invalid value for ", DepCtrName));
6376     return;
6377   default:
6378     assert(false);
6379   }
6380 }
6381 
6382 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6383 
6384   using namespace llvm::AMDGPU::DepCtr;
6385 
6386   SMLoc DepCtrLoc = getLoc();
6387   StringRef DepCtrName = getTokenStr();
6388 
6389   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6390       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6391     return false;
6392 
6393   int64_t ExprVal;
6394   if (!parseExpr(ExprVal))
6395     return false;
6396 
6397   unsigned PrevOprMask = UsedOprMask;
6398   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6399 
6400   if (CntVal < 0) {
6401     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6402     return false;
6403   }
6404 
6405   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6406     return false;
6407 
6408   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6409     if (isToken(AsmToken::EndOfStatement)) {
6410       Error(getLoc(), "expected a counter name");
6411       return false;
6412     }
6413   }
6414 
6415   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6416   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6417   return true;
6418 }
6419 
6420 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6421   using namespace llvm::AMDGPU::DepCtr;
6422 
6423   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6424   SMLoc Loc = getLoc();
6425 
6426   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6427     unsigned UsedOprMask = 0;
6428     while (!isToken(AsmToken::EndOfStatement)) {
6429       if (!parseDepCtr(DepCtr, UsedOprMask))
6430         return MatchOperand_ParseFail;
6431     }
6432   } else {
6433     if (!parseExpr(DepCtr))
6434       return MatchOperand_ParseFail;
6435   }
6436 
6437   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6438   return MatchOperand_Success;
6439 }
6440 
6441 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6442 
6443 //===----------------------------------------------------------------------===//
6444 // hwreg
6445 //===----------------------------------------------------------------------===//
6446 
6447 bool
6448 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6449                                 OperandInfoTy &Offset,
6450                                 OperandInfoTy &Width) {
6451   using namespace llvm::AMDGPU::Hwreg;
6452 
6453   // The register may be specified by name or using a numeric code
6454   HwReg.Loc = getLoc();
6455   if (isToken(AsmToken::Identifier) &&
6456       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6457     HwReg.IsSymbolic = true;
6458     lex(); // skip register name
6459   } else if (!parseExpr(HwReg.Id, "a register name")) {
6460     return false;
6461   }
6462 
6463   if (trySkipToken(AsmToken::RParen))
6464     return true;
6465 
6466   // parse optional params
6467   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6468     return false;
6469 
6470   Offset.Loc = getLoc();
6471   if (!parseExpr(Offset.Id))
6472     return false;
6473 
6474   if (!skipToken(AsmToken::Comma, "expected a comma"))
6475     return false;
6476 
6477   Width.Loc = getLoc();
6478   return parseExpr(Width.Id) &&
6479          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6480 }
6481 
6482 bool
6483 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6484                                const OperandInfoTy &Offset,
6485                                const OperandInfoTy &Width) {
6486 
6487   using namespace llvm::AMDGPU::Hwreg;
6488 
6489   if (HwReg.IsSymbolic) {
6490     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6491       Error(HwReg.Loc,
6492             "specified hardware register is not supported on this GPU");
6493       return false;
6494     }
6495   } else {
6496     if (!isValidHwreg(HwReg.Id)) {
6497       Error(HwReg.Loc,
6498             "invalid code of hardware register: only 6-bit values are legal");
6499       return false;
6500     }
6501   }
6502   if (!isValidHwregOffset(Offset.Id)) {
6503     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6504     return false;
6505   }
6506   if (!isValidHwregWidth(Width.Id)) {
6507     Error(Width.Loc,
6508           "invalid bitfield width: only values from 1 to 32 are legal");
6509     return false;
6510   }
6511   return true;
6512 }
6513 
6514 OperandMatchResultTy
6515 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6516   using namespace llvm::AMDGPU::Hwreg;
6517 
6518   int64_t ImmVal = 0;
6519   SMLoc Loc = getLoc();
6520 
6521   if (trySkipId("hwreg", AsmToken::LParen)) {
6522     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6523     OperandInfoTy Offset(OFFSET_DEFAULT_);
6524     OperandInfoTy Width(WIDTH_DEFAULT_);
6525     if (parseHwregBody(HwReg, Offset, Width) &&
6526         validateHwreg(HwReg, Offset, Width)) {
6527       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6528     } else {
6529       return MatchOperand_ParseFail;
6530     }
6531   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6532     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6533       Error(Loc, "invalid immediate: only 16-bit values are legal");
6534       return MatchOperand_ParseFail;
6535     }
6536   } else {
6537     return MatchOperand_ParseFail;
6538   }
6539 
6540   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6541   return MatchOperand_Success;
6542 }
6543 
6544 bool AMDGPUOperand::isHwreg() const {
6545   return isImmTy(ImmTyHwreg);
6546 }
6547 
6548 //===----------------------------------------------------------------------===//
6549 // sendmsg
6550 //===----------------------------------------------------------------------===//
6551 
6552 bool
6553 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6554                                   OperandInfoTy &Op,
6555                                   OperandInfoTy &Stream) {
6556   using namespace llvm::AMDGPU::SendMsg;
6557 
6558   Msg.Loc = getLoc();
6559   if (isToken(AsmToken::Identifier) &&
6560       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6561     Msg.IsSymbolic = true;
6562     lex(); // skip message name
6563   } else if (!parseExpr(Msg.Id, "a message name")) {
6564     return false;
6565   }
6566 
6567   if (trySkipToken(AsmToken::Comma)) {
6568     Op.IsDefined = true;
6569     Op.Loc = getLoc();
6570     if (isToken(AsmToken::Identifier) &&
6571         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6572       lex(); // skip operation name
6573     } else if (!parseExpr(Op.Id, "an operation name")) {
6574       return false;
6575     }
6576 
6577     if (trySkipToken(AsmToken::Comma)) {
6578       Stream.IsDefined = true;
6579       Stream.Loc = getLoc();
6580       if (!parseExpr(Stream.Id))
6581         return false;
6582     }
6583   }
6584 
6585   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6586 }
6587 
6588 bool
6589 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6590                                  const OperandInfoTy &Op,
6591                                  const OperandInfoTy &Stream) {
6592   using namespace llvm::AMDGPU::SendMsg;
6593 
6594   // Validation strictness depends on whether message is specified
6595   // in a symbolic or in a numeric form. In the latter case
6596   // only encoding possibility is checked.
6597   bool Strict = Msg.IsSymbolic;
6598 
6599   if (Strict) {
6600     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6601       Error(Msg.Loc, "specified message id is not supported on this GPU");
6602       return false;
6603     }
6604   } else {
6605     if (!isValidMsgId(Msg.Id)) {
6606       Error(Msg.Loc, "invalid message id");
6607       return false;
6608     }
6609   }
6610   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6611     if (Op.IsDefined) {
6612       Error(Op.Loc, "message does not support operations");
6613     } else {
6614       Error(Msg.Loc, "missing message operation");
6615     }
6616     return false;
6617   }
6618   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6619     Error(Op.Loc, "invalid operation id");
6620     return false;
6621   }
6622   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6623     Error(Stream.Loc, "message operation does not support streams");
6624     return false;
6625   }
6626   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6627     Error(Stream.Loc, "invalid message stream id");
6628     return false;
6629   }
6630   return true;
6631 }
6632 
6633 OperandMatchResultTy
6634 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6635   using namespace llvm::AMDGPU::SendMsg;
6636 
6637   int64_t ImmVal = 0;
6638   SMLoc Loc = getLoc();
6639 
6640   if (trySkipId("sendmsg", AsmToken::LParen)) {
6641     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6642     OperandInfoTy Op(OP_NONE_);
6643     OperandInfoTy Stream(STREAM_ID_NONE_);
6644     if (parseSendMsgBody(Msg, Op, Stream) &&
6645         validateSendMsg(Msg, Op, Stream)) {
6646       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6647     } else {
6648       return MatchOperand_ParseFail;
6649     }
6650   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6651     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6652       Error(Loc, "invalid immediate: only 16-bit values are legal");
6653       return MatchOperand_ParseFail;
6654     }
6655   } else {
6656     return MatchOperand_ParseFail;
6657   }
6658 
6659   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6660   return MatchOperand_Success;
6661 }
6662 
6663 bool AMDGPUOperand::isSendMsg() const {
6664   return isImmTy(ImmTySendMsg);
6665 }
6666 
6667 //===----------------------------------------------------------------------===//
6668 // v_interp
6669 //===----------------------------------------------------------------------===//
6670 
6671 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6672   StringRef Str;
6673   SMLoc S = getLoc();
6674 
6675   if (!parseId(Str))
6676     return MatchOperand_NoMatch;
6677 
6678   int Slot = StringSwitch<int>(Str)
6679     .Case("p10", 0)
6680     .Case("p20", 1)
6681     .Case("p0", 2)
6682     .Default(-1);
6683 
6684   if (Slot == -1) {
6685     Error(S, "invalid interpolation slot");
6686     return MatchOperand_ParseFail;
6687   }
6688 
6689   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6690                                               AMDGPUOperand::ImmTyInterpSlot));
6691   return MatchOperand_Success;
6692 }
6693 
6694 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6695   StringRef Str;
6696   SMLoc S = getLoc();
6697 
6698   if (!parseId(Str))
6699     return MatchOperand_NoMatch;
6700 
6701   if (!Str.startswith("attr")) {
6702     Error(S, "invalid interpolation attribute");
6703     return MatchOperand_ParseFail;
6704   }
6705 
6706   StringRef Chan = Str.take_back(2);
6707   int AttrChan = StringSwitch<int>(Chan)
6708     .Case(".x", 0)
6709     .Case(".y", 1)
6710     .Case(".z", 2)
6711     .Case(".w", 3)
6712     .Default(-1);
6713   if (AttrChan == -1) {
6714     Error(S, "invalid or missing interpolation attribute channel");
6715     return MatchOperand_ParseFail;
6716   }
6717 
6718   Str = Str.drop_back(2).drop_front(4);
6719 
6720   uint8_t Attr;
6721   if (Str.getAsInteger(10, Attr)) {
6722     Error(S, "invalid or missing interpolation attribute number");
6723     return MatchOperand_ParseFail;
6724   }
6725 
6726   if (Attr > 63) {
6727     Error(S, "out of bounds interpolation attribute number");
6728     return MatchOperand_ParseFail;
6729   }
6730 
6731   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6732 
6733   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6734                                               AMDGPUOperand::ImmTyInterpAttr));
6735   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6736                                               AMDGPUOperand::ImmTyAttrChan));
6737   return MatchOperand_Success;
6738 }
6739 
6740 //===----------------------------------------------------------------------===//
6741 // exp
6742 //===----------------------------------------------------------------------===//
6743 
6744 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6745   using namespace llvm::AMDGPU::Exp;
6746 
6747   StringRef Str;
6748   SMLoc S = getLoc();
6749 
6750   if (!parseId(Str))
6751     return MatchOperand_NoMatch;
6752 
6753   unsigned Id = getTgtId(Str);
6754   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6755     Error(S, (Id == ET_INVALID) ?
6756                 "invalid exp target" :
6757                 "exp target is not supported on this GPU");
6758     return MatchOperand_ParseFail;
6759   }
6760 
6761   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6762                                               AMDGPUOperand::ImmTyExpTgt));
6763   return MatchOperand_Success;
6764 }
6765 
6766 //===----------------------------------------------------------------------===//
6767 // parser helpers
6768 //===----------------------------------------------------------------------===//
6769 
6770 bool
6771 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6772   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6773 }
6774 
6775 bool
6776 AMDGPUAsmParser::isId(const StringRef Id) const {
6777   return isId(getToken(), Id);
6778 }
6779 
6780 bool
6781 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6782   return getTokenKind() == Kind;
6783 }
6784 
6785 bool
6786 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6787   if (isId(Id)) {
6788     lex();
6789     return true;
6790   }
6791   return false;
6792 }
6793 
6794 bool
6795 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6796   if (isToken(AsmToken::Identifier)) {
6797     StringRef Tok = getTokenStr();
6798     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6799       lex();
6800       return true;
6801     }
6802   }
6803   return false;
6804 }
6805 
6806 bool
6807 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6808   if (isId(Id) && peekToken().is(Kind)) {
6809     lex();
6810     lex();
6811     return true;
6812   }
6813   return false;
6814 }
6815 
6816 bool
6817 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6818   if (isToken(Kind)) {
6819     lex();
6820     return true;
6821   }
6822   return false;
6823 }
6824 
6825 bool
6826 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6827                            const StringRef ErrMsg) {
6828   if (!trySkipToken(Kind)) {
6829     Error(getLoc(), ErrMsg);
6830     return false;
6831   }
6832   return true;
6833 }
6834 
6835 bool
6836 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6837   SMLoc S = getLoc();
6838 
6839   const MCExpr *Expr;
6840   if (Parser.parseExpression(Expr))
6841     return false;
6842 
6843   if (Expr->evaluateAsAbsolute(Imm))
6844     return true;
6845 
6846   if (Expected.empty()) {
6847     Error(S, "expected absolute expression");
6848   } else {
6849     Error(S, Twine("expected ", Expected) +
6850              Twine(" or an absolute expression"));
6851   }
6852   return false;
6853 }
6854 
6855 bool
6856 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6857   SMLoc S = getLoc();
6858 
6859   const MCExpr *Expr;
6860   if (Parser.parseExpression(Expr))
6861     return false;
6862 
6863   int64_t IntVal;
6864   if (Expr->evaluateAsAbsolute(IntVal)) {
6865     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6866   } else {
6867     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6868   }
6869   return true;
6870 }
6871 
6872 bool
6873 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6874   if (isToken(AsmToken::String)) {
6875     Val = getToken().getStringContents();
6876     lex();
6877     return true;
6878   } else {
6879     Error(getLoc(), ErrMsg);
6880     return false;
6881   }
6882 }
6883 
6884 bool
6885 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6886   if (isToken(AsmToken::Identifier)) {
6887     Val = getTokenStr();
6888     lex();
6889     return true;
6890   } else {
6891     if (!ErrMsg.empty())
6892       Error(getLoc(), ErrMsg);
6893     return false;
6894   }
6895 }
6896 
6897 AsmToken
6898 AMDGPUAsmParser::getToken() const {
6899   return Parser.getTok();
6900 }
6901 
6902 AsmToken
6903 AMDGPUAsmParser::peekToken() {
6904   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6905 }
6906 
6907 void
6908 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6909   auto TokCount = getLexer().peekTokens(Tokens);
6910 
6911   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6912     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6913 }
6914 
6915 AsmToken::TokenKind
6916 AMDGPUAsmParser::getTokenKind() const {
6917   return getLexer().getKind();
6918 }
6919 
6920 SMLoc
6921 AMDGPUAsmParser::getLoc() const {
6922   return getToken().getLoc();
6923 }
6924 
6925 StringRef
6926 AMDGPUAsmParser::getTokenStr() const {
6927   return getToken().getString();
6928 }
6929 
6930 void
6931 AMDGPUAsmParser::lex() {
6932   Parser.Lex();
6933 }
6934 
6935 SMLoc
6936 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6937                                const OperandVector &Operands) const {
6938   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6939     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6940     if (Test(Op))
6941       return Op.getStartLoc();
6942   }
6943   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6944 }
6945 
6946 SMLoc
6947 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6948                            const OperandVector &Operands) const {
6949   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6950   return getOperandLoc(Test, Operands);
6951 }
6952 
6953 SMLoc
6954 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6955                            const OperandVector &Operands) const {
6956   auto Test = [=](const AMDGPUOperand& Op) {
6957     return Op.isRegKind() && Op.getReg() == Reg;
6958   };
6959   return getOperandLoc(Test, Operands);
6960 }
6961 
6962 SMLoc
6963 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6964   auto Test = [](const AMDGPUOperand& Op) {
6965     return Op.IsImmKindLiteral() || Op.isExpr();
6966   };
6967   return getOperandLoc(Test, Operands);
6968 }
6969 
6970 SMLoc
6971 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6972   auto Test = [](const AMDGPUOperand& Op) {
6973     return Op.isImmKindConst();
6974   };
6975   return getOperandLoc(Test, Operands);
6976 }
6977 
6978 //===----------------------------------------------------------------------===//
6979 // swizzle
6980 //===----------------------------------------------------------------------===//
6981 
6982 LLVM_READNONE
6983 static unsigned
6984 encodeBitmaskPerm(const unsigned AndMask,
6985                   const unsigned OrMask,
6986                   const unsigned XorMask) {
6987   using namespace llvm::AMDGPU::Swizzle;
6988 
6989   return BITMASK_PERM_ENC |
6990          (AndMask << BITMASK_AND_SHIFT) |
6991          (OrMask  << BITMASK_OR_SHIFT)  |
6992          (XorMask << BITMASK_XOR_SHIFT);
6993 }
6994 
6995 bool
6996 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6997                                      const unsigned MinVal,
6998                                      const unsigned MaxVal,
6999                                      const StringRef ErrMsg,
7000                                      SMLoc &Loc) {
7001   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7002     return false;
7003   }
7004   Loc = getLoc();
7005   if (!parseExpr(Op)) {
7006     return false;
7007   }
7008   if (Op < MinVal || Op > MaxVal) {
7009     Error(Loc, ErrMsg);
7010     return false;
7011   }
7012 
7013   return true;
7014 }
7015 
7016 bool
7017 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7018                                       const unsigned MinVal,
7019                                       const unsigned MaxVal,
7020                                       const StringRef ErrMsg) {
7021   SMLoc Loc;
7022   for (unsigned i = 0; i < OpNum; ++i) {
7023     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7024       return false;
7025   }
7026 
7027   return true;
7028 }
7029 
7030 bool
7031 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7032   using namespace llvm::AMDGPU::Swizzle;
7033 
7034   int64_t Lane[LANE_NUM];
7035   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7036                            "expected a 2-bit lane id")) {
7037     Imm = QUAD_PERM_ENC;
7038     for (unsigned I = 0; I < LANE_NUM; ++I) {
7039       Imm |= Lane[I] << (LANE_SHIFT * I);
7040     }
7041     return true;
7042   }
7043   return false;
7044 }
7045 
7046 bool
7047 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7048   using namespace llvm::AMDGPU::Swizzle;
7049 
7050   SMLoc Loc;
7051   int64_t GroupSize;
7052   int64_t LaneIdx;
7053 
7054   if (!parseSwizzleOperand(GroupSize,
7055                            2, 32,
7056                            "group size must be in the interval [2,32]",
7057                            Loc)) {
7058     return false;
7059   }
7060   if (!isPowerOf2_64(GroupSize)) {
7061     Error(Loc, "group size must be a power of two");
7062     return false;
7063   }
7064   if (parseSwizzleOperand(LaneIdx,
7065                           0, GroupSize - 1,
7066                           "lane id must be in the interval [0,group size - 1]",
7067                           Loc)) {
7068     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7069     return true;
7070   }
7071   return false;
7072 }
7073 
7074 bool
7075 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7076   using namespace llvm::AMDGPU::Swizzle;
7077 
7078   SMLoc Loc;
7079   int64_t GroupSize;
7080 
7081   if (!parseSwizzleOperand(GroupSize,
7082                            2, 32,
7083                            "group size must be in the interval [2,32]",
7084                            Loc)) {
7085     return false;
7086   }
7087   if (!isPowerOf2_64(GroupSize)) {
7088     Error(Loc, "group size must be a power of two");
7089     return false;
7090   }
7091 
7092   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7093   return true;
7094 }
7095 
7096 bool
7097 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7098   using namespace llvm::AMDGPU::Swizzle;
7099 
7100   SMLoc Loc;
7101   int64_t GroupSize;
7102 
7103   if (!parseSwizzleOperand(GroupSize,
7104                            1, 16,
7105                            "group size must be in the interval [1,16]",
7106                            Loc)) {
7107     return false;
7108   }
7109   if (!isPowerOf2_64(GroupSize)) {
7110     Error(Loc, "group size must be a power of two");
7111     return false;
7112   }
7113 
7114   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7115   return true;
7116 }
7117 
7118 bool
7119 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7120   using namespace llvm::AMDGPU::Swizzle;
7121 
7122   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7123     return false;
7124   }
7125 
7126   StringRef Ctl;
7127   SMLoc StrLoc = getLoc();
7128   if (!parseString(Ctl)) {
7129     return false;
7130   }
7131   if (Ctl.size() != BITMASK_WIDTH) {
7132     Error(StrLoc, "expected a 5-character mask");
7133     return false;
7134   }
7135 
7136   unsigned AndMask = 0;
7137   unsigned OrMask = 0;
7138   unsigned XorMask = 0;
7139 
7140   for (size_t i = 0; i < Ctl.size(); ++i) {
7141     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7142     switch(Ctl[i]) {
7143     default:
7144       Error(StrLoc, "invalid mask");
7145       return false;
7146     case '0':
7147       break;
7148     case '1':
7149       OrMask |= Mask;
7150       break;
7151     case 'p':
7152       AndMask |= Mask;
7153       break;
7154     case 'i':
7155       AndMask |= Mask;
7156       XorMask |= Mask;
7157       break;
7158     }
7159   }
7160 
7161   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7162   return true;
7163 }
7164 
7165 bool
7166 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7167 
7168   SMLoc OffsetLoc = getLoc();
7169 
7170   if (!parseExpr(Imm, "a swizzle macro")) {
7171     return false;
7172   }
7173   if (!isUInt<16>(Imm)) {
7174     Error(OffsetLoc, "expected a 16-bit offset");
7175     return false;
7176   }
7177   return true;
7178 }
7179 
7180 bool
7181 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7182   using namespace llvm::AMDGPU::Swizzle;
7183 
7184   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7185 
7186     SMLoc ModeLoc = getLoc();
7187     bool Ok = false;
7188 
7189     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7190       Ok = parseSwizzleQuadPerm(Imm);
7191     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7192       Ok = parseSwizzleBitmaskPerm(Imm);
7193     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7194       Ok = parseSwizzleBroadcast(Imm);
7195     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7196       Ok = parseSwizzleSwap(Imm);
7197     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7198       Ok = parseSwizzleReverse(Imm);
7199     } else {
7200       Error(ModeLoc, "expected a swizzle mode");
7201     }
7202 
7203     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7204   }
7205 
7206   return false;
7207 }
7208 
7209 OperandMatchResultTy
7210 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7211   SMLoc S = getLoc();
7212   int64_t Imm = 0;
7213 
7214   if (trySkipId("offset")) {
7215 
7216     bool Ok = false;
7217     if (skipToken(AsmToken::Colon, "expected a colon")) {
7218       if (trySkipId("swizzle")) {
7219         Ok = parseSwizzleMacro(Imm);
7220       } else {
7221         Ok = parseSwizzleOffset(Imm);
7222       }
7223     }
7224 
7225     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7226 
7227     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7228   } else {
7229     // Swizzle "offset" operand is optional.
7230     // If it is omitted, try parsing other optional operands.
7231     return parseOptionalOpr(Operands);
7232   }
7233 }
7234 
7235 bool
7236 AMDGPUOperand::isSwizzle() const {
7237   return isImmTy(ImmTySwizzle);
7238 }
7239 
7240 //===----------------------------------------------------------------------===//
7241 // VGPR Index Mode
7242 //===----------------------------------------------------------------------===//
7243 
7244 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7245 
7246   using namespace llvm::AMDGPU::VGPRIndexMode;
7247 
7248   if (trySkipToken(AsmToken::RParen)) {
7249     return OFF;
7250   }
7251 
7252   int64_t Imm = 0;
7253 
7254   while (true) {
7255     unsigned Mode = 0;
7256     SMLoc S = getLoc();
7257 
7258     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7259       if (trySkipId(IdSymbolic[ModeId])) {
7260         Mode = 1 << ModeId;
7261         break;
7262       }
7263     }
7264 
7265     if (Mode == 0) {
7266       Error(S, (Imm == 0)?
7267                "expected a VGPR index mode or a closing parenthesis" :
7268                "expected a VGPR index mode");
7269       return UNDEF;
7270     }
7271 
7272     if (Imm & Mode) {
7273       Error(S, "duplicate VGPR index mode");
7274       return UNDEF;
7275     }
7276     Imm |= Mode;
7277 
7278     if (trySkipToken(AsmToken::RParen))
7279       break;
7280     if (!skipToken(AsmToken::Comma,
7281                    "expected a comma or a closing parenthesis"))
7282       return UNDEF;
7283   }
7284 
7285   return Imm;
7286 }
7287 
7288 OperandMatchResultTy
7289 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7290 
7291   using namespace llvm::AMDGPU::VGPRIndexMode;
7292 
7293   int64_t Imm = 0;
7294   SMLoc S = getLoc();
7295 
7296   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7297     Imm = parseGPRIdxMacro();
7298     if (Imm == UNDEF)
7299       return MatchOperand_ParseFail;
7300   } else {
7301     if (getParser().parseAbsoluteExpression(Imm))
7302       return MatchOperand_ParseFail;
7303     if (Imm < 0 || !isUInt<4>(Imm)) {
7304       Error(S, "invalid immediate: only 4-bit values are legal");
7305       return MatchOperand_ParseFail;
7306     }
7307   }
7308 
7309   Operands.push_back(
7310       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7311   return MatchOperand_Success;
7312 }
7313 
7314 bool AMDGPUOperand::isGPRIdxMode() const {
7315   return isImmTy(ImmTyGprIdxMode);
7316 }
7317 
7318 //===----------------------------------------------------------------------===//
7319 // sopp branch targets
7320 //===----------------------------------------------------------------------===//
7321 
7322 OperandMatchResultTy
7323 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7324 
7325   // Make sure we are not parsing something
7326   // that looks like a label or an expression but is not.
7327   // This will improve error messages.
7328   if (isRegister() || isModifier())
7329     return MatchOperand_NoMatch;
7330 
7331   if (!parseExpr(Operands))
7332     return MatchOperand_ParseFail;
7333 
7334   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7335   assert(Opr.isImm() || Opr.isExpr());
7336   SMLoc Loc = Opr.getStartLoc();
7337 
7338   // Currently we do not support arbitrary expressions as branch targets.
7339   // Only labels and absolute expressions are accepted.
7340   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7341     Error(Loc, "expected an absolute expression or a label");
7342   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7343     Error(Loc, "expected a 16-bit signed jump offset");
7344   }
7345 
7346   return MatchOperand_Success;
7347 }
7348 
7349 //===----------------------------------------------------------------------===//
7350 // Boolean holding registers
7351 //===----------------------------------------------------------------------===//
7352 
7353 OperandMatchResultTy
7354 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7355   return parseReg(Operands);
7356 }
7357 
7358 //===----------------------------------------------------------------------===//
7359 // mubuf
7360 //===----------------------------------------------------------------------===//
7361 
7362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7363   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7364 }
7365 
7366 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7367                                    const OperandVector &Operands,
7368                                    bool IsAtomic,
7369                                    bool IsLds) {
7370   OptionalImmIndexMap OptionalIdx;
7371   unsigned FirstOperandIdx = 1;
7372   bool IsAtomicReturn = false;
7373 
7374   if (IsAtomic) {
7375     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7376       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7377       if (!Op.isCPol())
7378         continue;
7379       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7380       break;
7381     }
7382 
7383     if (!IsAtomicReturn) {
7384       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7385       if (NewOpc != -1)
7386         Inst.setOpcode(NewOpc);
7387     }
7388 
7389     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7390                       SIInstrFlags::IsAtomicRet;
7391   }
7392 
7393   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7394     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7395 
7396     // Add the register arguments
7397     if (Op.isReg()) {
7398       Op.addRegOperands(Inst, 1);
7399       // Insert a tied src for atomic return dst.
7400       // This cannot be postponed as subsequent calls to
7401       // addImmOperands rely on correct number of MC operands.
7402       if (IsAtomicReturn && i == FirstOperandIdx)
7403         Op.addRegOperands(Inst, 1);
7404       continue;
7405     }
7406 
7407     // Handle the case where soffset is an immediate
7408     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7409       Op.addImmOperands(Inst, 1);
7410       continue;
7411     }
7412 
7413     // Handle tokens like 'offen' which are sometimes hard-coded into the
7414     // asm string.  There are no MCInst operands for these.
7415     if (Op.isToken()) {
7416       continue;
7417     }
7418     assert(Op.isImm());
7419 
7420     // Handle optional arguments
7421     OptionalIdx[Op.getImmTy()] = i;
7422   }
7423 
7424   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7425   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7426 
7427   if (!IsLds) { // tfe is not legal with lds opcodes
7428     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7429   }
7430   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7431 }
7432 
7433 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7434   OptionalImmIndexMap OptionalIdx;
7435 
7436   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7437     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7438 
7439     // Add the register arguments
7440     if (Op.isReg()) {
7441       Op.addRegOperands(Inst, 1);
7442       continue;
7443     }
7444 
7445     // Handle the case where soffset is an immediate
7446     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7447       Op.addImmOperands(Inst, 1);
7448       continue;
7449     }
7450 
7451     // Handle tokens like 'offen' which are sometimes hard-coded into the
7452     // asm string.  There are no MCInst operands for these.
7453     if (Op.isToken()) {
7454       continue;
7455     }
7456     assert(Op.isImm());
7457 
7458     // Handle optional arguments
7459     OptionalIdx[Op.getImmTy()] = i;
7460   }
7461 
7462   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7463                         AMDGPUOperand::ImmTyOffset);
7464   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7465   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7466   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7467   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7468 }
7469 
7470 //===----------------------------------------------------------------------===//
7471 // mimg
7472 //===----------------------------------------------------------------------===//
7473 
7474 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7475                               bool IsAtomic) {
7476   unsigned I = 1;
7477   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7478   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7479     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7480   }
7481 
7482   if (IsAtomic) {
7483     // Add src, same as dst
7484     assert(Desc.getNumDefs() == 1);
7485     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7486   }
7487 
7488   OptionalImmIndexMap OptionalIdx;
7489 
7490   for (unsigned E = Operands.size(); I != E; ++I) {
7491     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7492 
7493     // Add the register arguments
7494     if (Op.isReg()) {
7495       Op.addRegOperands(Inst, 1);
7496     } else if (Op.isImmModifier()) {
7497       OptionalIdx[Op.getImmTy()] = I;
7498     } else if (!Op.isToken()) {
7499       llvm_unreachable("unexpected operand type");
7500     }
7501   }
7502 
7503   bool IsGFX10Plus = isGFX10Plus();
7504 
7505   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7506   if (IsGFX10Plus)
7507     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7508   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7509   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7510   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7511   if (IsGFX10Plus)
7512     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7513   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7514     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7515   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7516   if (!IsGFX10Plus)
7517     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7518   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7519 }
7520 
7521 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7522   cvtMIMG(Inst, Operands, true);
7523 }
7524 
7525 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7526   OptionalImmIndexMap OptionalIdx;
7527   bool IsAtomicReturn = false;
7528 
7529   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7530     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7531     if (!Op.isCPol())
7532       continue;
7533     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7534     break;
7535   }
7536 
7537   if (!IsAtomicReturn) {
7538     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7539     if (NewOpc != -1)
7540       Inst.setOpcode(NewOpc);
7541   }
7542 
7543   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7544                     SIInstrFlags::IsAtomicRet;
7545 
7546   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7547     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7548 
7549     // Add the register arguments
7550     if (Op.isReg()) {
7551       Op.addRegOperands(Inst, 1);
7552       if (IsAtomicReturn && i == 1)
7553         Op.addRegOperands(Inst, 1);
7554       continue;
7555     }
7556 
7557     // Handle the case where soffset is an immediate
7558     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7559       Op.addImmOperands(Inst, 1);
7560       continue;
7561     }
7562 
7563     // Handle tokens like 'offen' which are sometimes hard-coded into the
7564     // asm string.  There are no MCInst operands for these.
7565     if (Op.isToken()) {
7566       continue;
7567     }
7568     assert(Op.isImm());
7569 
7570     // Handle optional arguments
7571     OptionalIdx[Op.getImmTy()] = i;
7572   }
7573 
7574   if ((int)Inst.getNumOperands() <=
7575       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7576     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7577   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7578 }
7579 
7580 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7581                                       const OperandVector &Operands) {
7582   for (unsigned I = 1; I < Operands.size(); ++I) {
7583     auto &Operand = (AMDGPUOperand &)*Operands[I];
7584     if (Operand.isReg())
7585       Operand.addRegOperands(Inst, 1);
7586   }
7587 
7588   Inst.addOperand(MCOperand::createImm(1)); // a16
7589 }
7590 
7591 //===----------------------------------------------------------------------===//
7592 // smrd
7593 //===----------------------------------------------------------------------===//
7594 
7595 bool AMDGPUOperand::isSMRDOffset8() const {
7596   return isImm() && isUInt<8>(getImm());
7597 }
7598 
7599 bool AMDGPUOperand::isSMEMOffset() const {
7600   return isImm(); // Offset range is checked later by validator.
7601 }
7602 
7603 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7604   // 32-bit literals are only supported on CI and we only want to use them
7605   // when the offset is > 8-bits.
7606   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7607 }
7608 
7609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7610   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7611 }
7612 
7613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7614   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7615 }
7616 
7617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7618   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7619 }
7620 
7621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7622   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7623 }
7624 
7625 //===----------------------------------------------------------------------===//
7626 // vop3
7627 //===----------------------------------------------------------------------===//
7628 
7629 static bool ConvertOmodMul(int64_t &Mul) {
7630   if (Mul != 1 && Mul != 2 && Mul != 4)
7631     return false;
7632 
7633   Mul >>= 1;
7634   return true;
7635 }
7636 
7637 static bool ConvertOmodDiv(int64_t &Div) {
7638   if (Div == 1) {
7639     Div = 0;
7640     return true;
7641   }
7642 
7643   if (Div == 2) {
7644     Div = 3;
7645     return true;
7646   }
7647 
7648   return false;
7649 }
7650 
7651 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7652 // This is intentional and ensures compatibility with sp3.
7653 // See bug 35397 for details.
7654 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7655   if (BoundCtrl == 0 || BoundCtrl == 1) {
7656     BoundCtrl = 1;
7657     return true;
7658   }
7659   return false;
7660 }
7661 
7662 // Note: the order in this table matches the order of operands in AsmString.
7663 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7664   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7665   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7666   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7667   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7668   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7669   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7670   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7671   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7672   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7673   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7674   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7675   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7676   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7677   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7678   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7679   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7680   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7681   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7682   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7683   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7684   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7685   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7686   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7687   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7688   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7689   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7690   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7691   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7692   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7693   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7694   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7695   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7696   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7697   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7698   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7699   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7700   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7701   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7702   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7703   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7704   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7705 };
7706 
7707 void AMDGPUAsmParser::onBeginOfFile() {
7708   if (!getParser().getStreamer().getTargetStreamer() ||
7709       getSTI().getTargetTriple().getArch() == Triple::r600)
7710     return;
7711 
7712   if (!getTargetStreamer().getTargetID())
7713     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7714 
7715   if (isHsaAbiVersion3AndAbove(&getSTI()))
7716     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7717 }
7718 
7719 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7720 
7721   OperandMatchResultTy res = parseOptionalOpr(Operands);
7722 
7723   // This is a hack to enable hardcoded mandatory operands which follow
7724   // optional operands.
7725   //
7726   // Current design assumes that all operands after the first optional operand
7727   // are also optional. However implementation of some instructions violates
7728   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7729   //
7730   // To alleviate this problem, we have to (implicitly) parse extra operands
7731   // to make sure autogenerated parser of custom operands never hit hardcoded
7732   // mandatory operands.
7733 
7734   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7735     if (res != MatchOperand_Success ||
7736         isToken(AsmToken::EndOfStatement))
7737       break;
7738 
7739     trySkipToken(AsmToken::Comma);
7740     res = parseOptionalOpr(Operands);
7741   }
7742 
7743   return res;
7744 }
7745 
7746 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7747   OperandMatchResultTy res;
7748   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7749     // try to parse any optional operand here
7750     if (Op.IsBit) {
7751       res = parseNamedBit(Op.Name, Operands, Op.Type);
7752     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7753       res = parseOModOperand(Operands);
7754     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7755                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7756                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7757       res = parseSDWASel(Operands, Op.Name, Op.Type);
7758     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7759       res = parseSDWADstUnused(Operands);
7760     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7761                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7762                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7763                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7764       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7765                                         Op.ConvertResult);
7766     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7767       res = parseDim(Operands);
7768     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7769       res = parseCPol(Operands);
7770     } else {
7771       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7772       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7773         res = parseOperandArrayWithPrefix("neg", Operands,
7774                                           AMDGPUOperand::ImmTyBLGP,
7775                                           nullptr);
7776       }
7777     }
7778     if (res != MatchOperand_NoMatch) {
7779       return res;
7780     }
7781   }
7782   return MatchOperand_NoMatch;
7783 }
7784 
7785 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7786   StringRef Name = getTokenStr();
7787   if (Name == "mul") {
7788     return parseIntWithPrefix("mul", Operands,
7789                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7790   }
7791 
7792   if (Name == "div") {
7793     return parseIntWithPrefix("div", Operands,
7794                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7795   }
7796 
7797   return MatchOperand_NoMatch;
7798 }
7799 
7800 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7801   cvtVOP3P(Inst, Operands);
7802 
7803   int Opc = Inst.getOpcode();
7804 
7805   int SrcNum;
7806   const int Ops[] = { AMDGPU::OpName::src0,
7807                       AMDGPU::OpName::src1,
7808                       AMDGPU::OpName::src2 };
7809   for (SrcNum = 0;
7810        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7811        ++SrcNum);
7812   assert(SrcNum > 0);
7813 
7814   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7815   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7816 
7817   if ((OpSel & (1 << SrcNum)) != 0) {
7818     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7819     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7820     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7821   }
7822 }
7823 
7824 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7825       // 1. This operand is input modifiers
7826   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7827       // 2. This is not last operand
7828       && Desc.NumOperands > (OpNum + 1)
7829       // 3. Next operand is register class
7830       && Desc.OpInfo[OpNum + 1].RegClass != -1
7831       // 4. Next register is not tied to any other operand
7832       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7833 }
7834 
7835 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7836 {
7837   OptionalImmIndexMap OptionalIdx;
7838   unsigned Opc = Inst.getOpcode();
7839 
7840   unsigned I = 1;
7841   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7842   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7843     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7844   }
7845 
7846   for (unsigned E = Operands.size(); I != E; ++I) {
7847     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7848     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7849       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7850     } else if (Op.isInterpSlot() ||
7851                Op.isInterpAttr() ||
7852                Op.isAttrChan()) {
7853       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7854     } else if (Op.isImmModifier()) {
7855       OptionalIdx[Op.getImmTy()] = I;
7856     } else {
7857       llvm_unreachable("unhandled operand type");
7858     }
7859   }
7860 
7861   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7862     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7863   }
7864 
7865   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7866     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7867   }
7868 
7869   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7870     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7871   }
7872 }
7873 
7874 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7875                               OptionalImmIndexMap &OptionalIdx) {
7876   unsigned Opc = Inst.getOpcode();
7877 
7878   unsigned I = 1;
7879   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7880   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7881     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7882   }
7883 
7884   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7885     // This instruction has src modifiers
7886     for (unsigned E = Operands.size(); I != E; ++I) {
7887       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7888       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7889         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7890       } else if (Op.isImmModifier()) {
7891         OptionalIdx[Op.getImmTy()] = I;
7892       } else if (Op.isRegOrImm()) {
7893         Op.addRegOrImmOperands(Inst, 1);
7894       } else {
7895         llvm_unreachable("unhandled operand type");
7896       }
7897     }
7898   } else {
7899     // No src modifiers
7900     for (unsigned E = Operands.size(); I != E; ++I) {
7901       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7902       if (Op.isMod()) {
7903         OptionalIdx[Op.getImmTy()] = I;
7904       } else {
7905         Op.addRegOrImmOperands(Inst, 1);
7906       }
7907     }
7908   }
7909 
7910   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7911     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7912   }
7913 
7914   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7915     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7916   }
7917 
7918   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7919   // it has src2 register operand that is tied to dst operand
7920   // we don't allow modifiers for this operand in assembler so src2_modifiers
7921   // should be 0.
7922   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7923       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7924       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7925       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7926       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7927       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7928       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7929       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7930       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7931       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7932       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7933     auto it = Inst.begin();
7934     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7935     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7936     ++it;
7937     // Copy the operand to ensure it's not invalidated when Inst grows.
7938     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7939   }
7940 }
7941 
7942 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7943   OptionalImmIndexMap OptionalIdx;
7944   cvtVOP3(Inst, Operands, OptionalIdx);
7945 }
7946 
7947 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7948                                OptionalImmIndexMap &OptIdx) {
7949   const int Opc = Inst.getOpcode();
7950   const MCInstrDesc &Desc = MII.get(Opc);
7951 
7952   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7953 
7954   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7955     assert(!IsPacked);
7956     Inst.addOperand(Inst.getOperand(0));
7957   }
7958 
7959   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7960   // instruction, and then figure out where to actually put the modifiers
7961 
7962   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7963   if (OpSelIdx != -1) {
7964     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7965   }
7966 
7967   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7968   if (OpSelHiIdx != -1) {
7969     int DefaultVal = IsPacked ? -1 : 0;
7970     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7971                           DefaultVal);
7972   }
7973 
7974   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7975   if (NegLoIdx != -1) {
7976     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7977     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7978   }
7979 
7980   const int Ops[] = { AMDGPU::OpName::src0,
7981                       AMDGPU::OpName::src1,
7982                       AMDGPU::OpName::src2 };
7983   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7984                          AMDGPU::OpName::src1_modifiers,
7985                          AMDGPU::OpName::src2_modifiers };
7986 
7987   unsigned OpSel = 0;
7988   unsigned OpSelHi = 0;
7989   unsigned NegLo = 0;
7990   unsigned NegHi = 0;
7991 
7992   if (OpSelIdx != -1)
7993     OpSel = Inst.getOperand(OpSelIdx).getImm();
7994 
7995   if (OpSelHiIdx != -1)
7996     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7997 
7998   if (NegLoIdx != -1) {
7999     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8000     NegLo = Inst.getOperand(NegLoIdx).getImm();
8001     NegHi = Inst.getOperand(NegHiIdx).getImm();
8002   }
8003 
8004   for (int J = 0; J < 3; ++J) {
8005     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8006     if (OpIdx == -1)
8007       break;
8008 
8009     uint32_t ModVal = 0;
8010 
8011     if ((OpSel & (1 << J)) != 0)
8012       ModVal |= SISrcMods::OP_SEL_0;
8013 
8014     if ((OpSelHi & (1 << J)) != 0)
8015       ModVal |= SISrcMods::OP_SEL_1;
8016 
8017     if ((NegLo & (1 << J)) != 0)
8018       ModVal |= SISrcMods::NEG;
8019 
8020     if ((NegHi & (1 << J)) != 0)
8021       ModVal |= SISrcMods::NEG_HI;
8022 
8023     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8024 
8025     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8026   }
8027 }
8028 
8029 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8030   OptionalImmIndexMap OptIdx;
8031   cvtVOP3(Inst, Operands, OptIdx);
8032   cvtVOP3P(Inst, Operands, OptIdx);
8033 }
8034 
8035 //===----------------------------------------------------------------------===//
8036 // dpp
8037 //===----------------------------------------------------------------------===//
8038 
8039 bool AMDGPUOperand::isDPP8() const {
8040   return isImmTy(ImmTyDPP8);
8041 }
8042 
8043 bool AMDGPUOperand::isDPPCtrl() const {
8044   using namespace AMDGPU::DPP;
8045 
8046   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8047   if (result) {
8048     int64_t Imm = getImm();
8049     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8050            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8051            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8052            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8053            (Imm == DppCtrl::WAVE_SHL1) ||
8054            (Imm == DppCtrl::WAVE_ROL1) ||
8055            (Imm == DppCtrl::WAVE_SHR1) ||
8056            (Imm == DppCtrl::WAVE_ROR1) ||
8057            (Imm == DppCtrl::ROW_MIRROR) ||
8058            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8059            (Imm == DppCtrl::BCAST15) ||
8060            (Imm == DppCtrl::BCAST31) ||
8061            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8062            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8063   }
8064   return false;
8065 }
8066 
8067 //===----------------------------------------------------------------------===//
8068 // mAI
8069 //===----------------------------------------------------------------------===//
8070 
8071 bool AMDGPUOperand::isBLGP() const {
8072   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8073 }
8074 
8075 bool AMDGPUOperand::isCBSZ() const {
8076   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8077 }
8078 
8079 bool AMDGPUOperand::isABID() const {
8080   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8081 }
8082 
8083 bool AMDGPUOperand::isS16Imm() const {
8084   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8085 }
8086 
8087 bool AMDGPUOperand::isU16Imm() const {
8088   return isImm() && isUInt<16>(getImm());
8089 }
8090 
8091 //===----------------------------------------------------------------------===//
8092 // dim
8093 //===----------------------------------------------------------------------===//
8094 
8095 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8096   // We want to allow "dim:1D" etc.,
8097   // but the initial 1 is tokenized as an integer.
8098   std::string Token;
8099   if (isToken(AsmToken::Integer)) {
8100     SMLoc Loc = getToken().getEndLoc();
8101     Token = std::string(getTokenStr());
8102     lex();
8103     if (getLoc() != Loc)
8104       return false;
8105   }
8106 
8107   StringRef Suffix;
8108   if (!parseId(Suffix))
8109     return false;
8110   Token += Suffix;
8111 
8112   StringRef DimId = Token;
8113   if (DimId.startswith("SQ_RSRC_IMG_"))
8114     DimId = DimId.drop_front(12);
8115 
8116   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8117   if (!DimInfo)
8118     return false;
8119 
8120   Encoding = DimInfo->Encoding;
8121   return true;
8122 }
8123 
8124 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8125   if (!isGFX10Plus())
8126     return MatchOperand_NoMatch;
8127 
8128   SMLoc S = getLoc();
8129 
8130   if (!trySkipId("dim", AsmToken::Colon))
8131     return MatchOperand_NoMatch;
8132 
8133   unsigned Encoding;
8134   SMLoc Loc = getLoc();
8135   if (!parseDimId(Encoding)) {
8136     Error(Loc, "invalid dim value");
8137     return MatchOperand_ParseFail;
8138   }
8139 
8140   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8141                                               AMDGPUOperand::ImmTyDim));
8142   return MatchOperand_Success;
8143 }
8144 
8145 //===----------------------------------------------------------------------===//
8146 // dpp
8147 //===----------------------------------------------------------------------===//
8148 
8149 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8150   SMLoc S = getLoc();
8151 
8152   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8153     return MatchOperand_NoMatch;
8154 
8155   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8156 
8157   int64_t Sels[8];
8158 
8159   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8160     return MatchOperand_ParseFail;
8161 
8162   for (size_t i = 0; i < 8; ++i) {
8163     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8164       return MatchOperand_ParseFail;
8165 
8166     SMLoc Loc = getLoc();
8167     if (getParser().parseAbsoluteExpression(Sels[i]))
8168       return MatchOperand_ParseFail;
8169     if (0 > Sels[i] || 7 < Sels[i]) {
8170       Error(Loc, "expected a 3-bit value");
8171       return MatchOperand_ParseFail;
8172     }
8173   }
8174 
8175   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8176     return MatchOperand_ParseFail;
8177 
8178   unsigned DPP8 = 0;
8179   for (size_t i = 0; i < 8; ++i)
8180     DPP8 |= (Sels[i] << (i * 3));
8181 
8182   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8183   return MatchOperand_Success;
8184 }
8185 
8186 bool
8187 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8188                                     const OperandVector &Operands) {
8189   if (Ctrl == "row_newbcast")
8190     return isGFX90A();
8191 
8192   if (Ctrl == "row_share" ||
8193       Ctrl == "row_xmask")
8194     return isGFX10Plus();
8195 
8196   if (Ctrl == "wave_shl" ||
8197       Ctrl == "wave_shr" ||
8198       Ctrl == "wave_rol" ||
8199       Ctrl == "wave_ror" ||
8200       Ctrl == "row_bcast")
8201     return isVI() || isGFX9();
8202 
8203   return Ctrl == "row_mirror" ||
8204          Ctrl == "row_half_mirror" ||
8205          Ctrl == "quad_perm" ||
8206          Ctrl == "row_shl" ||
8207          Ctrl == "row_shr" ||
8208          Ctrl == "row_ror";
8209 }
8210 
8211 int64_t
8212 AMDGPUAsmParser::parseDPPCtrlPerm() {
8213   // quad_perm:[%d,%d,%d,%d]
8214 
8215   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8216     return -1;
8217 
8218   int64_t Val = 0;
8219   for (int i = 0; i < 4; ++i) {
8220     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8221       return -1;
8222 
8223     int64_t Temp;
8224     SMLoc Loc = getLoc();
8225     if (getParser().parseAbsoluteExpression(Temp))
8226       return -1;
8227     if (Temp < 0 || Temp > 3) {
8228       Error(Loc, "expected a 2-bit value");
8229       return -1;
8230     }
8231 
8232     Val += (Temp << i * 2);
8233   }
8234 
8235   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8236     return -1;
8237 
8238   return Val;
8239 }
8240 
8241 int64_t
8242 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8243   using namespace AMDGPU::DPP;
8244 
8245   // sel:%d
8246 
8247   int64_t Val;
8248   SMLoc Loc = getLoc();
8249 
8250   if (getParser().parseAbsoluteExpression(Val))
8251     return -1;
8252 
8253   struct DppCtrlCheck {
8254     int64_t Ctrl;
8255     int Lo;
8256     int Hi;
8257   };
8258 
8259   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8260     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8261     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8262     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8263     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8264     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8265     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8266     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8267     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8268     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8269     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8270     .Default({-1, 0, 0});
8271 
8272   bool Valid;
8273   if (Check.Ctrl == -1) {
8274     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8275     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8276   } else {
8277     Valid = Check.Lo <= Val && Val <= Check.Hi;
8278     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8279   }
8280 
8281   if (!Valid) {
8282     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8283     return -1;
8284   }
8285 
8286   return Val;
8287 }
8288 
8289 OperandMatchResultTy
8290 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8291   using namespace AMDGPU::DPP;
8292 
8293   if (!isToken(AsmToken::Identifier) ||
8294       !isSupportedDPPCtrl(getTokenStr(), Operands))
8295     return MatchOperand_NoMatch;
8296 
8297   SMLoc S = getLoc();
8298   int64_t Val = -1;
8299   StringRef Ctrl;
8300 
8301   parseId(Ctrl);
8302 
8303   if (Ctrl == "row_mirror") {
8304     Val = DppCtrl::ROW_MIRROR;
8305   } else if (Ctrl == "row_half_mirror") {
8306     Val = DppCtrl::ROW_HALF_MIRROR;
8307   } else {
8308     if (skipToken(AsmToken::Colon, "expected a colon")) {
8309       if (Ctrl == "quad_perm") {
8310         Val = parseDPPCtrlPerm();
8311       } else {
8312         Val = parseDPPCtrlSel(Ctrl);
8313       }
8314     }
8315   }
8316 
8317   if (Val == -1)
8318     return MatchOperand_ParseFail;
8319 
8320   Operands.push_back(
8321     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8322   return MatchOperand_Success;
8323 }
8324 
8325 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8326   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8327 }
8328 
8329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8330   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8331 }
8332 
8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8334   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8335 }
8336 
8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8339 }
8340 
8341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8342   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8343 }
8344 
8345 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8346   OptionalImmIndexMap OptionalIdx;
8347 
8348   unsigned Opc = Inst.getOpcode();
8349   bool HasModifiers =
8350       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8351   unsigned I = 1;
8352   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8353   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8354     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8355   }
8356 
8357   int Fi = 0;
8358   for (unsigned E = Operands.size(); I != E; ++I) {
8359     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8360                                             MCOI::TIED_TO);
8361     if (TiedTo != -1) {
8362       assert((unsigned)TiedTo < Inst.getNumOperands());
8363       // handle tied old or src2 for MAC instructions
8364       Inst.addOperand(Inst.getOperand(TiedTo));
8365     }
8366     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8367     // Add the register arguments
8368     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8369       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8370       // Skip it.
8371       continue;
8372     }
8373 
8374     if (IsDPP8) {
8375       if (Op.isDPP8()) {
8376         Op.addImmOperands(Inst, 1);
8377       } else if (HasModifiers &&
8378                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8379         Op.addRegWithFPInputModsOperands(Inst, 2);
8380       } else if (Op.isFI()) {
8381         Fi = Op.getImm();
8382       } else if (Op.isReg()) {
8383         Op.addRegOperands(Inst, 1);
8384       } else {
8385         llvm_unreachable("Invalid operand type");
8386       }
8387     } else {
8388       if (HasModifiers &&
8389           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8390         Op.addRegWithFPInputModsOperands(Inst, 2);
8391       } else if (Op.isReg()) {
8392         Op.addRegOperands(Inst, 1);
8393       } else if (Op.isDPPCtrl()) {
8394         Op.addImmOperands(Inst, 1);
8395       } else if (Op.isImm()) {
8396         // Handle optional arguments
8397         OptionalIdx[Op.getImmTy()] = I;
8398       } else {
8399         llvm_unreachable("Invalid operand type");
8400       }
8401     }
8402   }
8403 
8404   if (IsDPP8) {
8405     using namespace llvm::AMDGPU::DPP;
8406     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8407   } else {
8408     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8409     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8410     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8411     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8412       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8413     }
8414   }
8415 }
8416 
8417 //===----------------------------------------------------------------------===//
8418 // sdwa
8419 //===----------------------------------------------------------------------===//
8420 
8421 OperandMatchResultTy
8422 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8423                               AMDGPUOperand::ImmTy Type) {
8424   using namespace llvm::AMDGPU::SDWA;
8425 
8426   SMLoc S = getLoc();
8427   StringRef Value;
8428   OperandMatchResultTy res;
8429 
8430   SMLoc StringLoc;
8431   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8432   if (res != MatchOperand_Success) {
8433     return res;
8434   }
8435 
8436   int64_t Int;
8437   Int = StringSwitch<int64_t>(Value)
8438         .Case("BYTE_0", SdwaSel::BYTE_0)
8439         .Case("BYTE_1", SdwaSel::BYTE_1)
8440         .Case("BYTE_2", SdwaSel::BYTE_2)
8441         .Case("BYTE_3", SdwaSel::BYTE_3)
8442         .Case("WORD_0", SdwaSel::WORD_0)
8443         .Case("WORD_1", SdwaSel::WORD_1)
8444         .Case("DWORD", SdwaSel::DWORD)
8445         .Default(0xffffffff);
8446 
8447   if (Int == 0xffffffff) {
8448     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8449     return MatchOperand_ParseFail;
8450   }
8451 
8452   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8453   return MatchOperand_Success;
8454 }
8455 
8456 OperandMatchResultTy
8457 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8458   using namespace llvm::AMDGPU::SDWA;
8459 
8460   SMLoc S = getLoc();
8461   StringRef Value;
8462   OperandMatchResultTy res;
8463 
8464   SMLoc StringLoc;
8465   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8466   if (res != MatchOperand_Success) {
8467     return res;
8468   }
8469 
8470   int64_t Int;
8471   Int = StringSwitch<int64_t>(Value)
8472         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8473         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8474         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8475         .Default(0xffffffff);
8476 
8477   if (Int == 0xffffffff) {
8478     Error(StringLoc, "invalid dst_unused value");
8479     return MatchOperand_ParseFail;
8480   }
8481 
8482   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8483   return MatchOperand_Success;
8484 }
8485 
8486 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8487   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8488 }
8489 
8490 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8491   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8492 }
8493 
8494 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8495   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8496 }
8497 
8498 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8499   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8500 }
8501 
8502 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8503   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8504 }
8505 
8506 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8507                               uint64_t BasicInstType,
8508                               bool SkipDstVcc,
8509                               bool SkipSrcVcc) {
8510   using namespace llvm::AMDGPU::SDWA;
8511 
8512   OptionalImmIndexMap OptionalIdx;
8513   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8514   bool SkippedVcc = false;
8515 
8516   unsigned I = 1;
8517   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8518   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8519     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8520   }
8521 
8522   for (unsigned E = Operands.size(); I != E; ++I) {
8523     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8524     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8525         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8526       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8527       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8528       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8529       // Skip VCC only if we didn't skip it on previous iteration.
8530       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8531       if (BasicInstType == SIInstrFlags::VOP2 &&
8532           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8533            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8534         SkippedVcc = true;
8535         continue;
8536       } else if (BasicInstType == SIInstrFlags::VOPC &&
8537                  Inst.getNumOperands() == 0) {
8538         SkippedVcc = true;
8539         continue;
8540       }
8541     }
8542     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8543       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8544     } else if (Op.isImm()) {
8545       // Handle optional arguments
8546       OptionalIdx[Op.getImmTy()] = I;
8547     } else {
8548       llvm_unreachable("Invalid operand type");
8549     }
8550     SkippedVcc = false;
8551   }
8552 
8553   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8554       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8555       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8556     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8557     switch (BasicInstType) {
8558     case SIInstrFlags::VOP1:
8559       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8560       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8561         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8562       }
8563       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8564       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8565       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8566       break;
8567 
8568     case SIInstrFlags::VOP2:
8569       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8570       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8571         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8572       }
8573       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8574       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8575       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8576       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8577       break;
8578 
8579     case SIInstrFlags::VOPC:
8580       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8581         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8582       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8583       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8584       break;
8585 
8586     default:
8587       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8588     }
8589   }
8590 
8591   // special case v_mac_{f16, f32}:
8592   // it has src2 register operand that is tied to dst operand
8593   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8594       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8595     auto it = Inst.begin();
8596     std::advance(
8597       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8598     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8599   }
8600 }
8601 
8602 //===----------------------------------------------------------------------===//
8603 // mAI
8604 //===----------------------------------------------------------------------===//
8605 
8606 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8607   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8608 }
8609 
8610 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8611   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8612 }
8613 
8614 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8615   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8616 }
8617 
8618 /// Force static initialization.
8619 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8620   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8621   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8622 }
8623 
8624 #define GET_REGISTER_MATCHER
8625 #define GET_MATCHER_IMPLEMENTATION
8626 #define GET_MNEMONIC_SPELL_CHECKER
8627 #define GET_MNEMONIC_CHECKER
8628 #include "AMDGPUGenAsmMatcher.inc"
8629 
8630 // This function should be defined after auto-generated include so that we have
8631 // MatchClassKind enum defined
8632 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8633                                                      unsigned Kind) {
8634   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8635   // But MatchInstructionImpl() expects to meet token and fails to validate
8636   // operand. This method checks if we are given immediate operand but expect to
8637   // get corresponding token.
8638   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8639   switch (Kind) {
8640   case MCK_addr64:
8641     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8642   case MCK_gds:
8643     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8644   case MCK_lds:
8645     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8646   case MCK_idxen:
8647     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8648   case MCK_offen:
8649     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8650   case MCK_SSrcB32:
8651     // When operands have expression values, they will return true for isToken,
8652     // because it is not possible to distinguish between a token and an
8653     // expression at parse time. MatchInstructionImpl() will always try to
8654     // match an operand as a token, when isToken returns true, and when the
8655     // name of the expression is not a valid token, the match will fail,
8656     // so we need to handle it here.
8657     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8658   case MCK_SSrcF32:
8659     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8660   case MCK_SoppBrTarget:
8661     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8662   case MCK_VReg32OrOff:
8663     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8664   case MCK_InterpSlot:
8665     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8666   case MCK_Attr:
8667     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8668   case MCK_AttrChan:
8669     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8670   case MCK_ImmSMEMOffset:
8671     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8672   case MCK_SReg_64:
8673   case MCK_SReg_64_XEXEC:
8674     // Null is defined as a 32-bit register but
8675     // it should also be enabled with 64-bit operands.
8676     // The following code enables it for SReg_64 operands
8677     // used as source and destination. Remaining source
8678     // operands are handled in isInlinableImm.
8679     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8680   default:
8681     return Match_InvalidOperand;
8682   }
8683 }
8684 
8685 //===----------------------------------------------------------------------===//
8686 // endpgm
8687 //===----------------------------------------------------------------------===//
8688 
8689 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8690   SMLoc S = getLoc();
8691   int64_t Imm = 0;
8692 
8693   if (!parseExpr(Imm)) {
8694     // The operand is optional, if not present default to 0
8695     Imm = 0;
8696   }
8697 
8698   if (!isUInt<16>(Imm)) {
8699     Error(S, "expected a 16-bit value");
8700     return MatchOperand_ParseFail;
8701   }
8702 
8703   Operands.push_back(
8704       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8705   return MatchOperand_Success;
8706 }
8707 
8708 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8709