1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isHwreg() const;
823   bool isSendMsg() const;
824   bool isSwizzle() const;
825   bool isSMRDOffset8() const;
826   bool isSMEMOffset() const;
827   bool isSMRDLiteralOffset() const;
828   bool isDPP8() const;
829   bool isDPPCtrl() const;
830   bool isBLGP() const;
831   bool isCBSZ() const;
832   bool isABID() const;
833   bool isGPRIdxMode() const;
834   bool isS16Imm() const;
835   bool isU16Imm() const;
836   bool isEndpgm() const;
837 
838   StringRef getExpressionAsToken() const {
839     assert(isExpr());
840     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
841     return S->getSymbol().getName();
842   }
843 
844   StringRef getToken() const {
845     assert(isToken());
846 
847     if (Kind == Expression)
848       return getExpressionAsToken();
849 
850     return StringRef(Tok.Data, Tok.Length);
851   }
852 
853   int64_t getImm() const {
854     assert(isImm());
855     return Imm.Val;
856   }
857 
858   void setImm(int64_t Val) {
859     assert(isImm());
860     Imm.Val = Val;
861   }
862 
863   ImmTy getImmTy() const {
864     assert(isImm());
865     return Imm.Type;
866   }
867 
868   unsigned getReg() const override {
869     assert(isRegKind());
870     return Reg.RegNo;
871   }
872 
873   SMLoc getStartLoc() const override {
874     return StartLoc;
875   }
876 
877   SMLoc getEndLoc() const override {
878     return EndLoc;
879   }
880 
881   SMRange getLocRange() const {
882     return SMRange(StartLoc, EndLoc);
883   }
884 
885   Modifiers getModifiers() const {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     return isRegKind() ? Reg.Mods : Imm.Mods;
888   }
889 
890   void setModifiers(Modifiers Mods) {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     if (isRegKind())
893       Reg.Mods = Mods;
894     else
895       Imm.Mods = Mods;
896   }
897 
898   bool hasModifiers() const {
899     return getModifiers().hasModifiers();
900   }
901 
902   bool hasFPModifiers() const {
903     return getModifiers().hasFPModifiers();
904   }
905 
906   bool hasIntModifiers() const {
907     return getModifiers().hasIntModifiers();
908   }
909 
910   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
911 
912   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
913 
914   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
915 
916   template <unsigned Bitwidth>
917   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
918 
919   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<16>(Inst, N);
921   }
922 
923   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
924     addKImmFPOperands<32>(Inst, N);
925   }
926 
927   void addRegOperands(MCInst &Inst, unsigned N) const;
928 
929   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
930     addRegOperands(Inst, N);
931   }
932 
933   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
934     if (isRegKind())
935       addRegOperands(Inst, N);
936     else if (isExpr())
937       Inst.addOperand(MCOperand::createExpr(Expr));
938     else
939       addImmOperands(Inst, N);
940   }
941 
942   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
943     Modifiers Mods = getModifiers();
944     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
945     if (isRegKind()) {
946       addRegOperands(Inst, N);
947     } else {
948       addImmOperands(Inst, N, false);
949     }
950   }
951 
952   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasIntModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasFPModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
963     Modifiers Mods = getModifiers();
964     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
965     assert(isRegKind());
966     addRegOperands(Inst, N);
967   }
968 
969   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasIntModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasFPModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
980     if (isImm())
981       addImmOperands(Inst, N);
982     else {
983       assert(isExpr());
984       Inst.addOperand(MCOperand::createExpr(Expr));
985     }
986   }
987 
988   static void printImmTy(raw_ostream& OS, ImmTy Type) {
989     switch (Type) {
990     case ImmTyNone: OS << "None"; break;
991     case ImmTyGDS: OS << "GDS"; break;
992     case ImmTyLDS: OS << "LDS"; break;
993     case ImmTyOffen: OS << "Offen"; break;
994     case ImmTyIdxen: OS << "Idxen"; break;
995     case ImmTyAddr64: OS << "Addr64"; break;
996     case ImmTyOffset: OS << "Offset"; break;
997     case ImmTyInstOffset: OS << "InstOffset"; break;
998     case ImmTyOffset0: OS << "Offset0"; break;
999     case ImmTyOffset1: OS << "Offset1"; break;
1000     case ImmTyCPol: OS << "CPol"; break;
1001     case ImmTySWZ: OS << "SWZ"; break;
1002     case ImmTyTFE: OS << "TFE"; break;
1003     case ImmTyD16: OS << "D16"; break;
1004     case ImmTyFORMAT: OS << "FORMAT"; break;
1005     case ImmTyClampSI: OS << "ClampSI"; break;
1006     case ImmTyOModSI: OS << "OModSI"; break;
1007     case ImmTyDPP8: OS << "DPP8"; break;
1008     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1009     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1010     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1011     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1012     case ImmTyDppFi: OS << "FI"; break;
1013     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1014     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1015     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1016     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1017     case ImmTyDMask: OS << "DMask"; break;
1018     case ImmTyDim: OS << "Dim"; break;
1019     case ImmTyUNorm: OS << "UNorm"; break;
1020     case ImmTyDA: OS << "DA"; break;
1021     case ImmTyR128A16: OS << "R128A16"; break;
1022     case ImmTyA16: OS << "A16"; break;
1023     case ImmTyLWE: OS << "LWE"; break;
1024     case ImmTyOff: OS << "Off"; break;
1025     case ImmTyExpTgt: OS << "ExpTgt"; break;
1026     case ImmTyExpCompr: OS << "ExpCompr"; break;
1027     case ImmTyExpVM: OS << "ExpVM"; break;
1028     case ImmTyHwreg: OS << "Hwreg"; break;
1029     case ImmTySendMsg: OS << "SendMsg"; break;
1030     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1031     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1032     case ImmTyAttrChan: OS << "AttrChan"; break;
1033     case ImmTyOpSel: OS << "OpSel"; break;
1034     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1035     case ImmTyNegLo: OS << "NegLo"; break;
1036     case ImmTyNegHi: OS << "NegHi"; break;
1037     case ImmTySwizzle: OS << "Swizzle"; break;
1038     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1039     case ImmTyHigh: OS << "High"; break;
1040     case ImmTyBLGP: OS << "BLGP"; break;
1041     case ImmTyCBSZ: OS << "CBSZ"; break;
1042     case ImmTyABID: OS << "ABID"; break;
1043     case ImmTyEndpgm: OS << "Endpgm"; break;
1044     }
1045   }
1046 
1047   void print(raw_ostream &OS) const override {
1048     switch (Kind) {
1049     case Register:
1050       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1051       break;
1052     case Immediate:
1053       OS << '<' << getImm();
1054       if (getImmTy() != ImmTyNone) {
1055         OS << " type: "; printImmTy(OS, getImmTy());
1056       }
1057       OS << " mods: " << Imm.Mods << '>';
1058       break;
1059     case Token:
1060       OS << '\'' << getToken() << '\'';
1061       break;
1062     case Expression:
1063       OS << "<expr " << *Expr << '>';
1064       break;
1065     }
1066   }
1067 
1068   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1069                                       int64_t Val, SMLoc Loc,
1070                                       ImmTy Type = ImmTyNone,
1071                                       bool IsFPImm = false) {
1072     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1073     Op->Imm.Val = Val;
1074     Op->Imm.IsFPImm = IsFPImm;
1075     Op->Imm.Kind = ImmKindTyNone;
1076     Op->Imm.Type = Type;
1077     Op->Imm.Mods = Modifiers();
1078     Op->StartLoc = Loc;
1079     Op->EndLoc = Loc;
1080     return Op;
1081   }
1082 
1083   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1084                                         StringRef Str, SMLoc Loc,
1085                                         bool HasExplicitEncodingSize = true) {
1086     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1087     Res->Tok.Data = Str.data();
1088     Res->Tok.Length = Str.size();
1089     Res->StartLoc = Loc;
1090     Res->EndLoc = Loc;
1091     return Res;
1092   }
1093 
1094   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1095                                       unsigned RegNo, SMLoc S,
1096                                       SMLoc E) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1098     Op->Reg.RegNo = RegNo;
1099     Op->Reg.Mods = Modifiers();
1100     Op->StartLoc = S;
1101     Op->EndLoc = E;
1102     return Op;
1103   }
1104 
1105   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1106                                        const class MCExpr *Expr, SMLoc S) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1108     Op->Expr = Expr;
1109     Op->StartLoc = S;
1110     Op->EndLoc = S;
1111     return Op;
1112   }
1113 };
1114 
1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1116   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1117   return OS;
1118 }
1119 
1120 //===----------------------------------------------------------------------===//
1121 // AsmParser
1122 //===----------------------------------------------------------------------===//
1123 
1124 // Holds info related to the current kernel, e.g. count of SGPRs used.
1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1126 // .amdgpu_hsa_kernel or at EOF.
1127 class KernelScopeInfo {
1128   int SgprIndexUnusedMin = -1;
1129   int VgprIndexUnusedMin = -1;
1130   int AgprIndexUnusedMin = -1;
1131   MCContext *Ctx = nullptr;
1132   MCSubtargetInfo const *MSTI = nullptr;
1133 
1134   void usesSgprAt(int i) {
1135     if (i >= SgprIndexUnusedMin) {
1136       SgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol* const Sym =
1139           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1140         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1141       }
1142     }
1143   }
1144 
1145   void usesVgprAt(int i) {
1146     if (i >= VgprIndexUnusedMin) {
1147       VgprIndexUnusedMin = ++i;
1148       if (Ctx) {
1149         MCSymbol* const Sym =
1150           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1152                                          VgprIndexUnusedMin);
1153         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1154       }
1155     }
1156   }
1157 
1158   void usesAgprAt(int i) {
1159     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1160     if (!hasMAIInsts(*MSTI))
1161       return;
1162 
1163     if (i >= AgprIndexUnusedMin) {
1164       AgprIndexUnusedMin = ++i;
1165       if (Ctx) {
1166         MCSymbol* const Sym =
1167           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1168         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1169 
1170         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1171         MCSymbol* const vSym =
1172           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1173         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1174                                          VgprIndexUnusedMin);
1175         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1176       }
1177     }
1178   }
1179 
1180 public:
1181   KernelScopeInfo() = default;
1182 
1183   void initialize(MCContext &Context) {
1184     Ctx = &Context;
1185     MSTI = Ctx->getSubtargetInfo();
1186 
1187     usesSgprAt(SgprIndexUnusedMin = -1);
1188     usesVgprAt(VgprIndexUnusedMin = -1);
1189     if (hasMAIInsts(*MSTI)) {
1190       usesAgprAt(AgprIndexUnusedMin = -1);
1191     }
1192   }
1193 
1194   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1195                     unsigned RegWidth) {
1196     switch (RegKind) {
1197     case IS_SGPR:
1198       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1199       break;
1200     case IS_AGPR:
1201       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1202       break;
1203     case IS_VGPR:
1204       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1205       break;
1206     default:
1207       break;
1208     }
1209   }
1210 };
1211 
1212 class AMDGPUAsmParser : public MCTargetAsmParser {
1213   MCAsmParser &Parser;
1214 
1215   // Number of extra operands parsed after the first optional operand.
1216   // This may be necessary to skip hardcoded mandatory operands.
1217   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1218 
1219   unsigned ForcedEncodingSize = 0;
1220   bool ForcedDPP = false;
1221   bool ForcedSDWA = false;
1222   KernelScopeInfo KernelScope;
1223   unsigned CPolSeen;
1224 
1225   /// @name Auto-generated Match Functions
1226   /// {
1227 
1228 #define GET_ASSEMBLER_HEADER
1229 #include "AMDGPUGenAsmMatcher.inc"
1230 
1231   /// }
1232 
1233 private:
1234   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1235   bool OutOfRangeError(SMRange Range);
1236   /// Calculate VGPR/SGPR blocks required for given target, reserved
1237   /// registers, and user-specified NextFreeXGPR values.
1238   ///
1239   /// \param Features [in] Target features, used for bug corrections.
1240   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1241   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1242   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1243   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1244   /// descriptor field, if valid.
1245   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1246   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1247   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1248   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1249   /// \param VGPRBlocks [out] Result VGPR block count.
1250   /// \param SGPRBlocks [out] Result SGPR block count.
1251   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1252                           bool FlatScrUsed, bool XNACKUsed,
1253                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1254                           SMRange VGPRRange, unsigned NextFreeSGPR,
1255                           SMRange SGPRRange, unsigned &VGPRBlocks,
1256                           unsigned &SGPRBlocks);
1257   bool ParseDirectiveAMDGCNTarget();
1258   bool ParseDirectiveAMDHSAKernel();
1259   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1260   bool ParseDirectiveHSACodeObjectVersion();
1261   bool ParseDirectiveHSACodeObjectISA();
1262   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1263   bool ParseDirectiveAMDKernelCodeT();
1264   // TODO: Possibly make subtargetHasRegister const.
1265   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1266   bool ParseDirectiveAMDGPUHsaKernel();
1267 
1268   bool ParseDirectiveISAVersion();
1269   bool ParseDirectiveHSAMetadata();
1270   bool ParseDirectivePALMetadataBegin();
1271   bool ParseDirectivePALMetadata();
1272   bool ParseDirectiveAMDGPULDS();
1273 
1274   /// Common code to parse out a block of text (typically YAML) between start and
1275   /// end directives.
1276   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1277                            const char *AssemblerDirectiveEnd,
1278                            std::string &CollectString);
1279 
1280   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1281                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1282   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1283                            unsigned &RegNum, unsigned &RegWidth,
1284                            bool RestoreOnFailure = false);
1285   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1286                            unsigned &RegNum, unsigned &RegWidth,
1287                            SmallVectorImpl<AsmToken> &Tokens);
1288   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1289                            unsigned &RegWidth,
1290                            SmallVectorImpl<AsmToken> &Tokens);
1291   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1292                            unsigned &RegWidth,
1293                            SmallVectorImpl<AsmToken> &Tokens);
1294   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1295                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1296   bool ParseRegRange(unsigned& Num, unsigned& Width);
1297   unsigned getRegularReg(RegisterKind RegKind,
1298                          unsigned RegNum,
1299                          unsigned RegWidth,
1300                          SMLoc Loc);
1301 
1302   bool isRegister();
1303   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1304   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1305   void initializeGprCountSymbol(RegisterKind RegKind);
1306   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1307                              unsigned RegWidth);
1308   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1309                     bool IsAtomic, bool IsLds = false);
1310   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1311                  bool IsGdsHardcoded);
1312 
1313 public:
1314   enum AMDGPUMatchResultTy {
1315     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1316   };
1317   enum OperandMode {
1318     OperandMode_Default,
1319     OperandMode_NSA,
1320   };
1321 
1322   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1323 
1324   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1325                const MCInstrInfo &MII,
1326                const MCTargetOptions &Options)
1327       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1328     MCAsmParserExtension::Initialize(Parser);
1329 
1330     if (getFeatureBits().none()) {
1331       // Set default features.
1332       copySTI().ToggleFeature("southern-islands");
1333     }
1334 
1335     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1336 
1337     {
1338       // TODO: make those pre-defined variables read-only.
1339       // Currently there is none suitable machinery in the core llvm-mc for this.
1340       // MCSymbol::isRedefinable is intended for another purpose, and
1341       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1342       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1343       MCContext &Ctx = getContext();
1344       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1345         MCSymbol *Sym =
1346             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1350         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1351         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1352       } else {
1353         MCSymbol *Sym =
1354             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1355         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1356         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1357         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1358         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1359         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1360       }
1361       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1362         initializeGprCountSymbol(IS_VGPR);
1363         initializeGprCountSymbol(IS_SGPR);
1364       } else
1365         KernelScope.initialize(getContext());
1366     }
1367   }
1368 
1369   bool hasMIMG_R128() const {
1370     return AMDGPU::hasMIMG_R128(getSTI());
1371   }
1372 
1373   bool hasPackedD16() const {
1374     return AMDGPU::hasPackedD16(getSTI());
1375   }
1376 
1377   bool hasGFX10A16() const {
1378     return AMDGPU::hasGFX10A16(getSTI());
1379   }
1380 
1381   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1382 
1383   bool isSI() const {
1384     return AMDGPU::isSI(getSTI());
1385   }
1386 
1387   bool isCI() const {
1388     return AMDGPU::isCI(getSTI());
1389   }
1390 
1391   bool isVI() const {
1392     return AMDGPU::isVI(getSTI());
1393   }
1394 
1395   bool isGFX9() const {
1396     return AMDGPU::isGFX9(getSTI());
1397   }
1398 
1399   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1400   bool isGFX90A() const {
1401     return AMDGPU::isGFX90A(getSTI());
1402   }
1403 
1404   bool isGFX940() const {
1405     return AMDGPU::isGFX940(getSTI());
1406   }
1407 
1408   bool isGFX9Plus() const {
1409     return AMDGPU::isGFX9Plus(getSTI());
1410   }
1411 
1412   bool isGFX10() const {
1413     return AMDGPU::isGFX10(getSTI());
1414   }
1415 
1416   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1417 
1418   bool isGFX10_BEncoding() const {
1419     return AMDGPU::isGFX10_BEncoding(getSTI());
1420   }
1421 
1422   bool hasInv2PiInlineImm() const {
1423     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1424   }
1425 
1426   bool hasFlatOffsets() const {
1427     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1428   }
1429 
1430   bool hasArchitectedFlatScratch() const {
1431     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1432   }
1433 
1434   bool hasSGPR102_SGPR103() const {
1435     return !isVI() && !isGFX9();
1436   }
1437 
1438   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1439 
1440   bool hasIntClamp() const {
1441     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1442   }
1443 
1444   AMDGPUTargetStreamer &getTargetStreamer() {
1445     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1446     return static_cast<AMDGPUTargetStreamer &>(TS);
1447   }
1448 
1449   const MCRegisterInfo *getMRI() const {
1450     // We need this const_cast because for some reason getContext() is not const
1451     // in MCAsmParser.
1452     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1453   }
1454 
1455   const MCInstrInfo *getMII() const {
1456     return &MII;
1457   }
1458 
1459   const FeatureBitset &getFeatureBits() const {
1460     return getSTI().getFeatureBits();
1461   }
1462 
1463   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1464   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1465   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1466 
1467   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1468   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1469   bool isForcedDPP() const { return ForcedDPP; }
1470   bool isForcedSDWA() const { return ForcedSDWA; }
1471   ArrayRef<unsigned> getMatchedVariants() const;
1472   StringRef getMatchedVariantName() const;
1473 
1474   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1475   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1476                      bool RestoreOnFailure);
1477   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1478   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1479                                         SMLoc &EndLoc) override;
1480   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1481   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1482                                       unsigned Kind) override;
1483   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1484                                OperandVector &Operands, MCStreamer &Out,
1485                                uint64_t &ErrorInfo,
1486                                bool MatchingInlineAsm) override;
1487   bool ParseDirective(AsmToken DirectiveID) override;
1488   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1489                                     OperandMode Mode = OperandMode_Default);
1490   StringRef parseMnemonicSuffix(StringRef Name);
1491   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1492                         SMLoc NameLoc, OperandVector &Operands) override;
1493   //bool ProcessInstruction(MCInst &Inst);
1494 
1495   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1496 
1497   OperandMatchResultTy
1498   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1499                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1500                      bool (*ConvertResult)(int64_t &) = nullptr);
1501 
1502   OperandMatchResultTy
1503   parseOperandArrayWithPrefix(const char *Prefix,
1504                               OperandVector &Operands,
1505                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1506                               bool (*ConvertResult)(int64_t&) = nullptr);
1507 
1508   OperandMatchResultTy
1509   parseNamedBit(StringRef Name, OperandVector &Operands,
1510                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1511   OperandMatchResultTy parseCPol(OperandVector &Operands);
1512   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1513                                              StringRef &Value,
1514                                              SMLoc &StringLoc);
1515 
1516   bool isModifier();
1517   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1521   bool parseSP3NegModifier();
1522   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1523   OperandMatchResultTy parseReg(OperandVector &Operands);
1524   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1525   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1527   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1529   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1530   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1531   OperandMatchResultTy parseUfmt(int64_t &Format);
1532   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1534   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1535   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1536   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1537   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1538   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1539 
1540   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1541   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1542   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1543   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1544 
1545   bool parseCnt(int64_t &IntVal);
1546   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1547 
1548   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1549   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1550   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1551 
1552   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1553 
1554 private:
1555   struct OperandInfoTy {
1556     SMLoc Loc;
1557     int64_t Id;
1558     bool IsSymbolic = false;
1559     bool IsDefined = false;
1560 
1561     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1562   };
1563 
1564   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1565   bool validateSendMsg(const OperandInfoTy &Msg,
1566                        const OperandInfoTy &Op,
1567                        const OperandInfoTy &Stream);
1568 
1569   bool parseHwregBody(OperandInfoTy &HwReg,
1570                       OperandInfoTy &Offset,
1571                       OperandInfoTy &Width);
1572   bool validateHwreg(const OperandInfoTy &HwReg,
1573                      const OperandInfoTy &Offset,
1574                      const OperandInfoTy &Width);
1575 
1576   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1577   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1578   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1579 
1580   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1581                       const OperandVector &Operands) const;
1582   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1583   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1584   SMLoc getLitLoc(const OperandVector &Operands) const;
1585   SMLoc getConstLoc(const OperandVector &Operands) const;
1586 
1587   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1588   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1589   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateSOPLiteral(const MCInst &Inst) const;
1591   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1593   bool validateIntClampSupported(const MCInst &Inst);
1594   bool validateMIMGAtomicDMask(const MCInst &Inst);
1595   bool validateMIMGGatherDMask(const MCInst &Inst);
1596   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1597   bool validateMIMGDataSize(const MCInst &Inst);
1598   bool validateMIMGAddrSize(const MCInst &Inst);
1599   bool validateMIMGD16(const MCInst &Inst);
1600   bool validateMIMGDim(const MCInst &Inst);
1601   bool validateMIMGMSAA(const MCInst &Inst);
1602   bool validateOpSel(const MCInst &Inst);
1603   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateVccOperand(unsigned Reg) const;
1605   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1606   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateAGPRLdSt(const MCInst &Inst) const;
1609   bool validateVGPRAlign(const MCInst &Inst) const;
1610   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1612   bool validateDivScale(const MCInst &Inst);
1613   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1614                              const SMLoc &IDLoc);
1615   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1616   unsigned getConstantBusLimit(unsigned Opcode) const;
1617   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1618   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1619   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1620 
1621   bool isSupportedMnemo(StringRef Mnemo,
1622                         const FeatureBitset &FBS);
1623   bool isSupportedMnemo(StringRef Mnemo,
1624                         const FeatureBitset &FBS,
1625                         ArrayRef<unsigned> Variants);
1626   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1627 
1628   bool isId(const StringRef Id) const;
1629   bool isId(const AsmToken &Token, const StringRef Id) const;
1630   bool isToken(const AsmToken::TokenKind Kind) const;
1631   bool trySkipId(const StringRef Id);
1632   bool trySkipId(const StringRef Pref, const StringRef Id);
1633   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1634   bool trySkipToken(const AsmToken::TokenKind Kind);
1635   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1636   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1637   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1638 
1639   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1640   AsmToken::TokenKind getTokenKind() const;
1641   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1642   bool parseExpr(OperandVector &Operands);
1643   StringRef getTokenStr() const;
1644   AsmToken peekToken();
1645   AsmToken getToken() const;
1646   SMLoc getLoc() const;
1647   void lex();
1648 
1649 public:
1650   void onBeginOfFile() override;
1651 
1652   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1653   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1654 
1655   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1656   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1657   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1658   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1659   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1660   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1661 
1662   bool parseSwizzleOperand(int64_t &Op,
1663                            const unsigned MinVal,
1664                            const unsigned MaxVal,
1665                            const StringRef ErrMsg,
1666                            SMLoc &Loc);
1667   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1668                             const unsigned MinVal,
1669                             const unsigned MaxVal,
1670                             const StringRef ErrMsg);
1671   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1672   bool parseSwizzleOffset(int64_t &Imm);
1673   bool parseSwizzleMacro(int64_t &Imm);
1674   bool parseSwizzleQuadPerm(int64_t &Imm);
1675   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1676   bool parseSwizzleBroadcast(int64_t &Imm);
1677   bool parseSwizzleSwap(int64_t &Imm);
1678   bool parseSwizzleReverse(int64_t &Imm);
1679 
1680   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1681   int64_t parseGPRIdxMacro();
1682 
1683   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1684   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1685   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1686   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1687 
1688   AMDGPUOperand::Ptr defaultCPol() const;
1689 
1690   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1691   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1692   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1693   AMDGPUOperand::Ptr defaultFlatOffset() const;
1694 
1695   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1696 
1697   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1698                OptionalImmIndexMap &OptionalIdx);
1699   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1700   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1701   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1702   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1703                 OptionalImmIndexMap &OptionalIdx);
1704 
1705   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1706 
1707   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1708                bool IsAtomic = false);
1709   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1710   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1711 
1712   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1713 
1714   bool parseDimId(unsigned &Encoding);
1715   OperandMatchResultTy parseDim(OperandVector &Operands);
1716   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1717   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1718   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1719   int64_t parseDPPCtrlSel(StringRef Ctrl);
1720   int64_t parseDPPCtrlPerm();
1721   AMDGPUOperand::Ptr defaultRowMask() const;
1722   AMDGPUOperand::Ptr defaultBankMask() const;
1723   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1724   AMDGPUOperand::Ptr defaultFI() const;
1725   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1726   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1727 
1728   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1729                                     AMDGPUOperand::ImmTy Type);
1730   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1731   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1732   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1733   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1734   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1735   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1736   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1737                uint64_t BasicInstType,
1738                bool SkipDstVcc = false,
1739                bool SkipSrcVcc = false);
1740 
1741   AMDGPUOperand::Ptr defaultBLGP() const;
1742   AMDGPUOperand::Ptr defaultCBSZ() const;
1743   AMDGPUOperand::Ptr defaultABID() const;
1744 
1745   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1746   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1747 };
1748 
1749 struct OptionalOperand {
1750   const char *Name;
1751   AMDGPUOperand::ImmTy Type;
1752   bool IsBit;
1753   bool (*ConvertResult)(int64_t&);
1754 };
1755 
1756 } // end anonymous namespace
1757 
1758 // May be called with integer type with equivalent bitwidth.
1759 static const fltSemantics *getFltSemantics(unsigned Size) {
1760   switch (Size) {
1761   case 4:
1762     return &APFloat::IEEEsingle();
1763   case 8:
1764     return &APFloat::IEEEdouble();
1765   case 2:
1766     return &APFloat::IEEEhalf();
1767   default:
1768     llvm_unreachable("unsupported fp type");
1769   }
1770 }
1771 
1772 static const fltSemantics *getFltSemantics(MVT VT) {
1773   return getFltSemantics(VT.getSizeInBits() / 8);
1774 }
1775 
1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1777   switch (OperandType) {
1778   case AMDGPU::OPERAND_REG_IMM_INT32:
1779   case AMDGPU::OPERAND_REG_IMM_FP32:
1780   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1786   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1788   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1789   case AMDGPU::OPERAND_KIMM32:
1790     return &APFloat::IEEEsingle();
1791   case AMDGPU::OPERAND_REG_IMM_INT64:
1792   case AMDGPU::OPERAND_REG_IMM_FP64:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1796     return &APFloat::IEEEdouble();
1797   case AMDGPU::OPERAND_REG_IMM_INT16:
1798   case AMDGPU::OPERAND_REG_IMM_FP16:
1799   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1800   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1810   case AMDGPU::OPERAND_KIMM16:
1811     return &APFloat::IEEEhalf();
1812   default:
1813     llvm_unreachable("unsupported fp type");
1814   }
1815 }
1816 
1817 //===----------------------------------------------------------------------===//
1818 // Operand
1819 //===----------------------------------------------------------------------===//
1820 
1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1822   bool Lost;
1823 
1824   // Convert literal to single precision
1825   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1826                                                APFloat::rmNearestTiesToEven,
1827                                                &Lost);
1828   // We allow precision lost but not overflow or underflow
1829   if (Status != APFloat::opOK &&
1830       Lost &&
1831       ((Status & APFloat::opOverflow)  != 0 ||
1832        (Status & APFloat::opUnderflow) != 0)) {
1833     return false;
1834   }
1835 
1836   return true;
1837 }
1838 
1839 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1840   return isUIntN(Size, Val) || isIntN(Size, Val);
1841 }
1842 
1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1844   if (VT.getScalarType() == MVT::i16) {
1845     // FP immediate values are broken.
1846     return isInlinableIntLiteral(Val);
1847   }
1848 
1849   // f16/v2f16 operands work correctly for all values.
1850   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1851 }
1852 
1853 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1854 
1855   // This is a hack to enable named inline values like
1856   // shared_base with both 32-bit and 64-bit operands.
1857   // Note that these values are defined as
1858   // 32-bit operands only.
1859   if (isInlineValue()) {
1860     return true;
1861   }
1862 
1863   if (!isImmTy(ImmTyNone)) {
1864     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1865     return false;
1866   }
1867   // TODO: We should avoid using host float here. It would be better to
1868   // check the float bit values which is what a few other places do.
1869   // We've had bot failures before due to weird NaN support on mips hosts.
1870 
1871   APInt Literal(64, Imm.Val);
1872 
1873   if (Imm.IsFPImm) { // We got fp literal token
1874     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875       return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                           AsmParser->hasInv2PiInlineImm());
1877     }
1878 
1879     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1880     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1881       return false;
1882 
1883     if (type.getScalarSizeInBits() == 16) {
1884       return isInlineableLiteralOp16(
1885         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1886         type, AsmParser->hasInv2PiInlineImm());
1887     }
1888 
1889     // Check if single precision literal is inlinable
1890     return AMDGPU::isInlinableLiteral32(
1891       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1892       AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   // We got int literal token.
1896   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1897     return AMDGPU::isInlinableLiteral64(Imm.Val,
1898                                         AsmParser->hasInv2PiInlineImm());
1899   }
1900 
1901   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1902     return false;
1903   }
1904 
1905   if (type.getScalarSizeInBits() == 16) {
1906     return isInlineableLiteralOp16(
1907       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1908       type, AsmParser->hasInv2PiInlineImm());
1909   }
1910 
1911   return AMDGPU::isInlinableLiteral32(
1912     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1913     AsmParser->hasInv2PiInlineImm());
1914 }
1915 
1916 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1917   // Check that this immediate can be added as literal
1918   if (!isImmTy(ImmTyNone)) {
1919     return false;
1920   }
1921 
1922   if (!Imm.IsFPImm) {
1923     // We got int literal token.
1924 
1925     if (type == MVT::f64 && hasFPModifiers()) {
1926       // Cannot apply fp modifiers to int literals preserving the same semantics
1927       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1928       // disable these cases.
1929       return false;
1930     }
1931 
1932     unsigned Size = type.getSizeInBits();
1933     if (Size == 64)
1934       Size = 32;
1935 
1936     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1937     // types.
1938     return isSafeTruncation(Imm.Val, Size);
1939   }
1940 
1941   // We got fp literal token
1942   if (type == MVT::f64) { // Expected 64-bit fp operand
1943     // We would set low 64-bits of literal to zeroes but we accept this literals
1944     return true;
1945   }
1946 
1947   if (type == MVT::i64) { // Expected 64-bit int operand
1948     // We don't allow fp literals in 64-bit integer instructions. It is
1949     // unclear how we should encode them.
1950     return false;
1951   }
1952 
1953   // We allow fp literals with f16x2 operands assuming that the specified
1954   // literal goes into the lower half and the upper half is zero. We also
1955   // require that the literal may be losslessly converted to f16.
1956   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1957                      (type == MVT::v2i16)? MVT::i16 :
1958                      (type == MVT::v2f32)? MVT::f32 : type;
1959 
1960   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1961   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1962 }
1963 
1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1965   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1966 }
1967 
1968 bool AMDGPUOperand::isVRegWithInputMods() const {
1969   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1970          // GFX90A allows DPP on 64-bit operands.
1971          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1972           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1976   if (AsmParser->isVI())
1977     return isVReg32();
1978   else if (AsmParser->isGFX9Plus())
1979     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1980   else
1981     return false;
1982 }
1983 
1984 bool AMDGPUOperand::isSDWAFP16Operand() const {
1985   return isSDWAOperand(MVT::f16);
1986 }
1987 
1988 bool AMDGPUOperand::isSDWAFP32Operand() const {
1989   return isSDWAOperand(MVT::f32);
1990 }
1991 
1992 bool AMDGPUOperand::isSDWAInt16Operand() const {
1993   return isSDWAOperand(MVT::i16);
1994 }
1995 
1996 bool AMDGPUOperand::isSDWAInt32Operand() const {
1997   return isSDWAOperand(MVT::i32);
1998 }
1999 
2000 bool AMDGPUOperand::isBoolReg() const {
2001   auto FB = AsmParser->getFeatureBits();
2002   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2003                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2004 }
2005 
2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2007 {
2008   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2009   assert(Size == 2 || Size == 4 || Size == 8);
2010 
2011   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2012 
2013   if (Imm.Mods.Abs) {
2014     Val &= ~FpSignMask;
2015   }
2016   if (Imm.Mods.Neg) {
2017     Val ^= FpSignMask;
2018   }
2019 
2020   return Val;
2021 }
2022 
2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2024   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2025                              Inst.getNumOperands())) {
2026     addLiteralImmOperand(Inst, Imm.Val,
2027                          ApplyModifiers &
2028                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029   } else {
2030     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2031     Inst.addOperand(MCOperand::createImm(Imm.Val));
2032     setImmKindNone();
2033   }
2034 }
2035 
2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2037   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2038   auto OpNum = Inst.getNumOperands();
2039   // Check that this operand accepts literals
2040   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2041 
2042   if (ApplyModifiers) {
2043     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2044     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2045     Val = applyInputFPModifiers(Val, Size);
2046   }
2047 
2048   APInt Literal(64, Val);
2049   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2050 
2051   if (Imm.IsFPImm) { // We got fp literal token
2052     switch (OpTy) {
2053     case AMDGPU::OPERAND_REG_IMM_INT64:
2054     case AMDGPU::OPERAND_REG_IMM_FP64:
2055     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2056     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2057     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2058       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2059                                        AsmParser->hasInv2PiInlineImm())) {
2060         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2061         setImmKindConst();
2062         return;
2063       }
2064 
2065       // Non-inlineable
2066       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2067         // For fp operands we check if low 32 bits are zeros
2068         if (Literal.getLoBits(32) != 0) {
2069           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2070           "Can't encode literal as exact 64-bit floating-point operand. "
2071           "Low 32-bits will be set to zero");
2072         }
2073 
2074         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2075         setImmKindLiteral();
2076         return;
2077       }
2078 
2079       // We don't allow fp literals in 64-bit integer instructions. It is
2080       // unclear how we should encode them. This case should be checked earlier
2081       // in predicate methods (isLiteralImm())
2082       llvm_unreachable("fp literal in 64-bit integer instruction.");
2083 
2084     case AMDGPU::OPERAND_REG_IMM_INT32:
2085     case AMDGPU::OPERAND_REG_IMM_FP32:
2086     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2087     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2088     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2090     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2091     case AMDGPU::OPERAND_REG_IMM_INT16:
2092     case AMDGPU::OPERAND_REG_IMM_FP16:
2093     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2094     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2095     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2096     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2097     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2098     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2099     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2100     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2102     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2103     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2104     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2105     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2106     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2107     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2108     case AMDGPU::OPERAND_KIMM32:
2109     case AMDGPU::OPERAND_KIMM16: {
2110       bool lost;
2111       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2112       // Convert literal to single precision
2113       FPLiteral.convert(*getOpFltSemantics(OpTy),
2114                         APFloat::rmNearestTiesToEven, &lost);
2115       // We allow precision lost but not overflow or underflow. This should be
2116       // checked earlier in isLiteralImm()
2117 
2118       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2119       Inst.addOperand(MCOperand::createImm(ImmVal));
2120       setImmKindLiteral();
2121       return;
2122     }
2123     default:
2124       llvm_unreachable("invalid operand size");
2125     }
2126 
2127     return;
2128   }
2129 
2130   // We got int literal token.
2131   // Only sign extend inline immediates.
2132   switch (OpTy) {
2133   case AMDGPU::OPERAND_REG_IMM_INT32:
2134   case AMDGPU::OPERAND_REG_IMM_FP32:
2135   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2136   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2138   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2139   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2140   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2141   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2142   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2143   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2144   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2145   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2146     if (isSafeTruncation(Val, 32) &&
2147         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2148                                      AsmParser->hasInv2PiInlineImm())) {
2149       Inst.addOperand(MCOperand::createImm(Val));
2150       setImmKindConst();
2151       return;
2152     }
2153 
2154     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2155     setImmKindLiteral();
2156     return;
2157 
2158   case AMDGPU::OPERAND_REG_IMM_INT64:
2159   case AMDGPU::OPERAND_REG_IMM_FP64:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2163     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2164       Inst.addOperand(MCOperand::createImm(Val));
2165       setImmKindConst();
2166       return;
2167     }
2168 
2169     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2170     setImmKindLiteral();
2171     return;
2172 
2173   case AMDGPU::OPERAND_REG_IMM_INT16:
2174   case AMDGPU::OPERAND_REG_IMM_FP16:
2175   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2176   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2177   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2179   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2180     if (isSafeTruncation(Val, 16) &&
2181         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2182                                      AsmParser->hasInv2PiInlineImm())) {
2183       Inst.addOperand(MCOperand::createImm(Val));
2184       setImmKindConst();
2185       return;
2186     }
2187 
2188     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2189     setImmKindLiteral();
2190     return;
2191 
2192   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2194   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2195   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2196     assert(isSafeTruncation(Val, 16));
2197     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2198                                         AsmParser->hasInv2PiInlineImm()));
2199 
2200     Inst.addOperand(MCOperand::createImm(Val));
2201     return;
2202   }
2203   case AMDGPU::OPERAND_KIMM32:
2204     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2205     setImmKindNone();
2206     return;
2207   case AMDGPU::OPERAND_KIMM16:
2208     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2209     setImmKindNone();
2210     return;
2211   default:
2212     llvm_unreachable("invalid operand size");
2213   }
2214 }
2215 
2216 template <unsigned Bitwidth>
2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2218   APInt Literal(64, Imm.Val);
2219   setImmKindNone();
2220 
2221   if (!Imm.IsFPImm) {
2222     // We got int literal token.
2223     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2224     return;
2225   }
2226 
2227   bool Lost;
2228   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2229   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2230                     APFloat::rmNearestTiesToEven, &Lost);
2231   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2232 }
2233 
2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2235   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2236 }
2237 
2238 static bool isInlineValue(unsigned Reg) {
2239   switch (Reg) {
2240   case AMDGPU::SRC_SHARED_BASE:
2241   case AMDGPU::SRC_SHARED_LIMIT:
2242   case AMDGPU::SRC_PRIVATE_BASE:
2243   case AMDGPU::SRC_PRIVATE_LIMIT:
2244   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2245     return true;
2246   case AMDGPU::SRC_VCCZ:
2247   case AMDGPU::SRC_EXECZ:
2248   case AMDGPU::SRC_SCC:
2249     return true;
2250   case AMDGPU::SGPR_NULL:
2251     return true;
2252   default:
2253     return false;
2254   }
2255 }
2256 
2257 bool AMDGPUOperand::isInlineValue() const {
2258   return isRegKind() && ::isInlineValue(getReg());
2259 }
2260 
2261 //===----------------------------------------------------------------------===//
2262 // AsmParser
2263 //===----------------------------------------------------------------------===//
2264 
2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2266   if (Is == IS_VGPR) {
2267     switch (RegWidth) {
2268       default: return -1;
2269       case 32:
2270         return AMDGPU::VGPR_32RegClassID;
2271       case 64:
2272         return AMDGPU::VReg_64RegClassID;
2273       case 96:
2274         return AMDGPU::VReg_96RegClassID;
2275       case 128:
2276         return AMDGPU::VReg_128RegClassID;
2277       case 160:
2278         return AMDGPU::VReg_160RegClassID;
2279       case 192:
2280         return AMDGPU::VReg_192RegClassID;
2281       case 224:
2282         return AMDGPU::VReg_224RegClassID;
2283       case 256:
2284         return AMDGPU::VReg_256RegClassID;
2285       case 512:
2286         return AMDGPU::VReg_512RegClassID;
2287       case 1024:
2288         return AMDGPU::VReg_1024RegClassID;
2289     }
2290   } else if (Is == IS_TTMP) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::TTMP_32RegClassID;
2295       case 64:
2296         return AMDGPU::TTMP_64RegClassID;
2297       case 128:
2298         return AMDGPU::TTMP_128RegClassID;
2299       case 256:
2300         return AMDGPU::TTMP_256RegClassID;
2301       case 512:
2302         return AMDGPU::TTMP_512RegClassID;
2303     }
2304   } else if (Is == IS_SGPR) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::SGPR_32RegClassID;
2309       case 64:
2310         return AMDGPU::SGPR_64RegClassID;
2311       case 96:
2312         return AMDGPU::SGPR_96RegClassID;
2313       case 128:
2314         return AMDGPU::SGPR_128RegClassID;
2315       case 160:
2316         return AMDGPU::SGPR_160RegClassID;
2317       case 192:
2318         return AMDGPU::SGPR_192RegClassID;
2319       case 224:
2320         return AMDGPU::SGPR_224RegClassID;
2321       case 256:
2322         return AMDGPU::SGPR_256RegClassID;
2323       case 512:
2324         return AMDGPU::SGPR_512RegClassID;
2325     }
2326   } else if (Is == IS_AGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::AGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::AReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::AReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::AReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::AReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::AReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::AReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::AReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::AReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::AReg_1024RegClassID;
2349     }
2350   }
2351   return -1;
2352 }
2353 
2354 static unsigned getSpecialRegForName(StringRef RegName) {
2355   return StringSwitch<unsigned>(RegName)
2356     .Case("exec", AMDGPU::EXEC)
2357     .Case("vcc", AMDGPU::VCC)
2358     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2359     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2360     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2361     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2362     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2363     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2364     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2365     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2366     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2367     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2368     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2369     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2370     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2371     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2372     .Case("m0", AMDGPU::M0)
2373     .Case("vccz", AMDGPU::SRC_VCCZ)
2374     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2375     .Case("execz", AMDGPU::SRC_EXECZ)
2376     .Case("src_execz", AMDGPU::SRC_EXECZ)
2377     .Case("scc", AMDGPU::SRC_SCC)
2378     .Case("src_scc", AMDGPU::SRC_SCC)
2379     .Case("tba", AMDGPU::TBA)
2380     .Case("tma", AMDGPU::TMA)
2381     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2382     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2383     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2384     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2385     .Case("vcc_lo", AMDGPU::VCC_LO)
2386     .Case("vcc_hi", AMDGPU::VCC_HI)
2387     .Case("exec_lo", AMDGPU::EXEC_LO)
2388     .Case("exec_hi", AMDGPU::EXEC_HI)
2389     .Case("tma_lo", AMDGPU::TMA_LO)
2390     .Case("tma_hi", AMDGPU::TMA_HI)
2391     .Case("tba_lo", AMDGPU::TBA_LO)
2392     .Case("tba_hi", AMDGPU::TBA_HI)
2393     .Case("pc", AMDGPU::PC_REG)
2394     .Case("null", AMDGPU::SGPR_NULL)
2395     .Default(AMDGPU::NoRegister);
2396 }
2397 
2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2399                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2400   auto R = parseRegister();
2401   if (!R) return true;
2402   assert(R->isReg());
2403   RegNo = R->getReg();
2404   StartLoc = R->getStartLoc();
2405   EndLoc = R->getEndLoc();
2406   return false;
2407 }
2408 
2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2410                                     SMLoc &EndLoc) {
2411   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2412 }
2413 
2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2415                                                        SMLoc &StartLoc,
2416                                                        SMLoc &EndLoc) {
2417   bool Result =
2418       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2419   bool PendingErrors = getParser().hasPendingError();
2420   getParser().clearPendingErrors();
2421   if (PendingErrors)
2422     return MatchOperand_ParseFail;
2423   if (Result)
2424     return MatchOperand_NoMatch;
2425   return MatchOperand_Success;
2426 }
2427 
2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2429                                             RegisterKind RegKind, unsigned Reg1,
2430                                             SMLoc Loc) {
2431   switch (RegKind) {
2432   case IS_SPECIAL:
2433     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2434       Reg = AMDGPU::EXEC;
2435       RegWidth = 64;
2436       return true;
2437     }
2438     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2439       Reg = AMDGPU::FLAT_SCR;
2440       RegWidth = 64;
2441       return true;
2442     }
2443     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2444       Reg = AMDGPU::XNACK_MASK;
2445       RegWidth = 64;
2446       return true;
2447     }
2448     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2449       Reg = AMDGPU::VCC;
2450       RegWidth = 64;
2451       return true;
2452     }
2453     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2454       Reg = AMDGPU::TBA;
2455       RegWidth = 64;
2456       return true;
2457     }
2458     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2459       Reg = AMDGPU::TMA;
2460       RegWidth = 64;
2461       return true;
2462     }
2463     Error(Loc, "register does not fit in the list");
2464     return false;
2465   case IS_VGPR:
2466   case IS_SGPR:
2467   case IS_AGPR:
2468   case IS_TTMP:
2469     if (Reg1 != Reg + RegWidth / 32) {
2470       Error(Loc, "registers in a list must have consecutive indices");
2471       return false;
2472     }
2473     RegWidth += 32;
2474     return true;
2475   default:
2476     llvm_unreachable("unexpected register kind");
2477   }
2478 }
2479 
2480 struct RegInfo {
2481   StringLiteral Name;
2482   RegisterKind Kind;
2483 };
2484 
2485 static constexpr RegInfo RegularRegisters[] = {
2486   {{"v"},    IS_VGPR},
2487   {{"s"},    IS_SGPR},
2488   {{"ttmp"}, IS_TTMP},
2489   {{"acc"},  IS_AGPR},
2490   {{"a"},    IS_AGPR},
2491 };
2492 
2493 static bool isRegularReg(RegisterKind Kind) {
2494   return Kind == IS_VGPR ||
2495          Kind == IS_SGPR ||
2496          Kind == IS_TTMP ||
2497          Kind == IS_AGPR;
2498 }
2499 
2500 static const RegInfo* getRegularRegInfo(StringRef Str) {
2501   for (const RegInfo &Reg : RegularRegisters)
2502     if (Str.startswith(Reg.Name))
2503       return &Reg;
2504   return nullptr;
2505 }
2506 
2507 static bool getRegNum(StringRef Str, unsigned& Num) {
2508   return !Str.getAsInteger(10, Num);
2509 }
2510 
2511 bool
2512 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2513                             const AsmToken &NextToken) const {
2514 
2515   // A list of consecutive registers: [s0,s1,s2,s3]
2516   if (Token.is(AsmToken::LBrac))
2517     return true;
2518 
2519   if (!Token.is(AsmToken::Identifier))
2520     return false;
2521 
2522   // A single register like s0 or a range of registers like s[0:1]
2523 
2524   StringRef Str = Token.getString();
2525   const RegInfo *Reg = getRegularRegInfo(Str);
2526   if (Reg) {
2527     StringRef RegName = Reg->Name;
2528     StringRef RegSuffix = Str.substr(RegName.size());
2529     if (!RegSuffix.empty()) {
2530       unsigned Num;
2531       // A single register with an index: rXX
2532       if (getRegNum(RegSuffix, Num))
2533         return true;
2534     } else {
2535       // A range of registers: r[XX:YY].
2536       if (NextToken.is(AsmToken::LBrac))
2537         return true;
2538     }
2539   }
2540 
2541   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2542 }
2543 
2544 bool
2545 AMDGPUAsmParser::isRegister()
2546 {
2547   return isRegister(getToken(), peekToken());
2548 }
2549 
2550 unsigned
2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2552                                unsigned RegNum,
2553                                unsigned RegWidth,
2554                                SMLoc Loc) {
2555 
2556   assert(isRegularReg(RegKind));
2557 
2558   unsigned AlignSize = 1;
2559   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2560     // SGPR and TTMP registers must be aligned.
2561     // Max required alignment is 4 dwords.
2562     AlignSize = std::min(RegWidth / 32, 4u);
2563   }
2564 
2565   if (RegNum % AlignSize != 0) {
2566     Error(Loc, "invalid register alignment");
2567     return AMDGPU::NoRegister;
2568   }
2569 
2570   unsigned RegIdx = RegNum / AlignSize;
2571   int RCID = getRegClass(RegKind, RegWidth);
2572   if (RCID == -1) {
2573     Error(Loc, "invalid or unsupported register size");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2578   const MCRegisterClass RC = TRI->getRegClass(RCID);
2579   if (RegIdx >= RC.getNumRegs()) {
2580     Error(Loc, "register index is out of range");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   return RC.getRegister(RegIdx);
2585 }
2586 
2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2588   int64_t RegLo, RegHi;
2589   if (!skipToken(AsmToken::LBrac, "missing register index"))
2590     return false;
2591 
2592   SMLoc FirstIdxLoc = getLoc();
2593   SMLoc SecondIdxLoc;
2594 
2595   if (!parseExpr(RegLo))
2596     return false;
2597 
2598   if (trySkipToken(AsmToken::Colon)) {
2599     SecondIdxLoc = getLoc();
2600     if (!parseExpr(RegHi))
2601       return false;
2602   } else {
2603     RegHi = RegLo;
2604   }
2605 
2606   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2607     return false;
2608 
2609   if (!isUInt<32>(RegLo)) {
2610     Error(FirstIdxLoc, "invalid register index");
2611     return false;
2612   }
2613 
2614   if (!isUInt<32>(RegHi)) {
2615     Error(SecondIdxLoc, "invalid register index");
2616     return false;
2617   }
2618 
2619   if (RegLo > RegHi) {
2620     Error(FirstIdxLoc, "first register index should not exceed second index");
2621     return false;
2622   }
2623 
2624   Num = static_cast<unsigned>(RegLo);
2625   RegWidth = 32 * ((RegHi - RegLo) + 1);
2626   return true;
2627 }
2628 
2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2630                                           unsigned &RegNum, unsigned &RegWidth,
2631                                           SmallVectorImpl<AsmToken> &Tokens) {
2632   assert(isToken(AsmToken::Identifier));
2633   unsigned Reg = getSpecialRegForName(getTokenStr());
2634   if (Reg) {
2635     RegNum = 0;
2636     RegWidth = 32;
2637     RegKind = IS_SPECIAL;
2638     Tokens.push_back(getToken());
2639     lex(); // skip register name
2640   }
2641   return Reg;
2642 }
2643 
2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2645                                           unsigned &RegNum, unsigned &RegWidth,
2646                                           SmallVectorImpl<AsmToken> &Tokens) {
2647   assert(isToken(AsmToken::Identifier));
2648   StringRef RegName = getTokenStr();
2649   auto Loc = getLoc();
2650 
2651   const RegInfo *RI = getRegularRegInfo(RegName);
2652   if (!RI) {
2653     Error(Loc, "invalid register name");
2654     return AMDGPU::NoRegister;
2655   }
2656 
2657   Tokens.push_back(getToken());
2658   lex(); // skip register name
2659 
2660   RegKind = RI->Kind;
2661   StringRef RegSuffix = RegName.substr(RI->Name.size());
2662   if (!RegSuffix.empty()) {
2663     // Single 32-bit register: vXX.
2664     if (!getRegNum(RegSuffix, RegNum)) {
2665       Error(Loc, "invalid register index");
2666       return AMDGPU::NoRegister;
2667     }
2668     RegWidth = 32;
2669   } else {
2670     // Range of registers: v[XX:YY]. ":YY" is optional.
2671     if (!ParseRegRange(RegNum, RegWidth))
2672       return AMDGPU::NoRegister;
2673   }
2674 
2675   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2676 }
2677 
2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2679                                        unsigned &RegWidth,
2680                                        SmallVectorImpl<AsmToken> &Tokens) {
2681   unsigned Reg = AMDGPU::NoRegister;
2682   auto ListLoc = getLoc();
2683 
2684   if (!skipToken(AsmToken::LBrac,
2685                  "expected a register or a list of registers")) {
2686     return AMDGPU::NoRegister;
2687   }
2688 
2689   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2690 
2691   auto Loc = getLoc();
2692   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2693     return AMDGPU::NoRegister;
2694   if (RegWidth != 32) {
2695     Error(Loc, "expected a single 32-bit register");
2696     return AMDGPU::NoRegister;
2697   }
2698 
2699   for (; trySkipToken(AsmToken::Comma); ) {
2700     RegisterKind NextRegKind;
2701     unsigned NextReg, NextRegNum, NextRegWidth;
2702     Loc = getLoc();
2703 
2704     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2705                              NextRegNum, NextRegWidth,
2706                              Tokens)) {
2707       return AMDGPU::NoRegister;
2708     }
2709     if (NextRegWidth != 32) {
2710       Error(Loc, "expected a single 32-bit register");
2711       return AMDGPU::NoRegister;
2712     }
2713     if (NextRegKind != RegKind) {
2714       Error(Loc, "registers in a list must be of the same kind");
2715       return AMDGPU::NoRegister;
2716     }
2717     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2718       return AMDGPU::NoRegister;
2719   }
2720 
2721   if (!skipToken(AsmToken::RBrac,
2722                  "expected a comma or a closing square bracket")) {
2723     return AMDGPU::NoRegister;
2724   }
2725 
2726   if (isRegularReg(RegKind))
2727     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2728 
2729   return Reg;
2730 }
2731 
2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2733                                           unsigned &RegNum, unsigned &RegWidth,
2734                                           SmallVectorImpl<AsmToken> &Tokens) {
2735   auto Loc = getLoc();
2736   Reg = AMDGPU::NoRegister;
2737 
2738   if (isToken(AsmToken::Identifier)) {
2739     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2740     if (Reg == AMDGPU::NoRegister)
2741       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2742   } else {
2743     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2744   }
2745 
2746   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2747   if (Reg == AMDGPU::NoRegister) {
2748     assert(Parser.hasPendingError());
2749     return false;
2750   }
2751 
2752   if (!subtargetHasRegister(*TRI, Reg)) {
2753     if (Reg == AMDGPU::SGPR_NULL) {
2754       Error(Loc, "'null' operand is not supported on this GPU");
2755     } else {
2756       Error(Loc, "register not available on this GPU");
2757     }
2758     return false;
2759   }
2760 
2761   return true;
2762 }
2763 
2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2765                                           unsigned &RegNum, unsigned &RegWidth,
2766                                           bool RestoreOnFailure /*=false*/) {
2767   Reg = AMDGPU::NoRegister;
2768 
2769   SmallVector<AsmToken, 1> Tokens;
2770   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2771     if (RestoreOnFailure) {
2772       while (!Tokens.empty()) {
2773         getLexer().UnLex(Tokens.pop_back_val());
2774       }
2775     }
2776     return true;
2777   }
2778   return false;
2779 }
2780 
2781 Optional<StringRef>
2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2783   switch (RegKind) {
2784   case IS_VGPR:
2785     return StringRef(".amdgcn.next_free_vgpr");
2786   case IS_SGPR:
2787     return StringRef(".amdgcn.next_free_sgpr");
2788   default:
2789     return None;
2790   }
2791 }
2792 
2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2794   auto SymbolName = getGprCountSymbolName(RegKind);
2795   assert(SymbolName && "initializing invalid register kind");
2796   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2797   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2798 }
2799 
2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2801                                             unsigned DwordRegIndex,
2802                                             unsigned RegWidth) {
2803   // Symbols are only defined for GCN targets
2804   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2805     return true;
2806 
2807   auto SymbolName = getGprCountSymbolName(RegKind);
2808   if (!SymbolName)
2809     return true;
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811 
2812   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2813   int64_t OldCount;
2814 
2815   if (!Sym->isVariable())
2816     return !Error(getLoc(),
2817                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2818   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2819     return !Error(
2820         getLoc(),
2821         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2822 
2823   if (OldCount <= NewMax)
2824     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2825 
2826   return true;
2827 }
2828 
2829 std::unique_ptr<AMDGPUOperand>
2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2831   const auto &Tok = getToken();
2832   SMLoc StartLoc = Tok.getLoc();
2833   SMLoc EndLoc = Tok.getEndLoc();
2834   RegisterKind RegKind;
2835   unsigned Reg, RegNum, RegWidth;
2836 
2837   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2838     return nullptr;
2839   }
2840   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2841     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2842       return nullptr;
2843   } else
2844     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2845   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2846 }
2847 
2848 OperandMatchResultTy
2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2850   // TODO: add syntactic sugar for 1/(2*PI)
2851 
2852   assert(!isRegister());
2853   assert(!isModifier());
2854 
2855   const auto& Tok = getToken();
2856   const auto& NextTok = peekToken();
2857   bool IsReal = Tok.is(AsmToken::Real);
2858   SMLoc S = getLoc();
2859   bool Negate = false;
2860 
2861   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2862     lex();
2863     IsReal = true;
2864     Negate = true;
2865   }
2866 
2867   if (IsReal) {
2868     // Floating-point expressions are not supported.
2869     // Can only allow floating-point literals with an
2870     // optional sign.
2871 
2872     StringRef Num = getTokenStr();
2873     lex();
2874 
2875     APFloat RealVal(APFloat::IEEEdouble());
2876     auto roundMode = APFloat::rmNearestTiesToEven;
2877     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2878       return MatchOperand_ParseFail;
2879     }
2880     if (Negate)
2881       RealVal.changeSign();
2882 
2883     Operands.push_back(
2884       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2885                                AMDGPUOperand::ImmTyNone, true));
2886 
2887     return MatchOperand_Success;
2888 
2889   } else {
2890     int64_t IntVal;
2891     const MCExpr *Expr;
2892     SMLoc S = getLoc();
2893 
2894     if (HasSP3AbsModifier) {
2895       // This is a workaround for handling expressions
2896       // as arguments of SP3 'abs' modifier, for example:
2897       //     |1.0|
2898       //     |-1|
2899       //     |1+x|
2900       // This syntax is not compatible with syntax of standard
2901       // MC expressions (due to the trailing '|').
2902       SMLoc EndLoc;
2903       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2904         return MatchOperand_ParseFail;
2905     } else {
2906       if (Parser.parseExpression(Expr))
2907         return MatchOperand_ParseFail;
2908     }
2909 
2910     if (Expr->evaluateAsAbsolute(IntVal)) {
2911       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2912     } else {
2913       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2914     }
2915 
2916     return MatchOperand_Success;
2917   }
2918 
2919   return MatchOperand_NoMatch;
2920 }
2921 
2922 OperandMatchResultTy
2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2924   if (!isRegister())
2925     return MatchOperand_NoMatch;
2926 
2927   if (auto R = parseRegister()) {
2928     assert(R->isReg());
2929     Operands.push_back(std::move(R));
2930     return MatchOperand_Success;
2931   }
2932   return MatchOperand_ParseFail;
2933 }
2934 
2935 OperandMatchResultTy
2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2937   auto res = parseReg(Operands);
2938   if (res != MatchOperand_NoMatch) {
2939     return res;
2940   } else if (isModifier()) {
2941     return MatchOperand_NoMatch;
2942   } else {
2943     return parseImm(Operands, HasSP3AbsMod);
2944   }
2945 }
2946 
2947 bool
2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2949   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2950     const auto &str = Token.getString();
2951     return str == "abs" || str == "neg" || str == "sext";
2952   }
2953   return false;
2954 }
2955 
2956 bool
2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2958   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2964 }
2965 
2966 bool
2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2968   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2969 }
2970 
2971 // Check if this is an operand modifier or an opcode modifier
2972 // which may look like an expression but it is not. We should
2973 // avoid parsing these modifiers as expressions. Currently
2974 // recognized sequences are:
2975 //   |...|
2976 //   abs(...)
2977 //   neg(...)
2978 //   sext(...)
2979 //   -reg
2980 //   -|...|
2981 //   -abs(...)
2982 //   name:...
2983 // Note that simple opcode modifiers like 'gds' may be parsed as
2984 // expressions; this is a special case. See getExpressionAsToken.
2985 //
2986 bool
2987 AMDGPUAsmParser::isModifier() {
2988 
2989   AsmToken Tok = getToken();
2990   AsmToken NextToken[2];
2991   peekTokens(NextToken);
2992 
2993   return isOperandModifier(Tok, NextToken[0]) ||
2994          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2995          isOpcodeModifierWithVal(Tok, NextToken[0]);
2996 }
2997 
2998 // Check if the current token is an SP3 'neg' modifier.
2999 // Currently this modifier is allowed in the following context:
3000 //
3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3002 // 2. Before an 'abs' modifier: -abs(...)
3003 // 3. Before an SP3 'abs' modifier: -|...|
3004 //
3005 // In all other cases "-" is handled as a part
3006 // of an expression that follows the sign.
3007 //
3008 // Note: When "-" is followed by an integer literal,
3009 // this is interpreted as integer negation rather
3010 // than a floating-point NEG modifier applied to N.
3011 // Beside being contr-intuitive, such use of floating-point
3012 // NEG modifier would have resulted in different meaning
3013 // of integer literals used with VOP1/2/C and VOP3,
3014 // for example:
3015 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3016 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3017 // Negative fp literals with preceding "-" are
3018 // handled likewise for uniformity
3019 //
3020 bool
3021 AMDGPUAsmParser::parseSP3NegModifier() {
3022 
3023   AsmToken NextToken[2];
3024   peekTokens(NextToken);
3025 
3026   if (isToken(AsmToken::Minus) &&
3027       (isRegister(NextToken[0], NextToken[1]) ||
3028        NextToken[0].is(AsmToken::Pipe) ||
3029        isId(NextToken[0], "abs"))) {
3030     lex();
3031     return true;
3032   }
3033 
3034   return false;
3035 }
3036 
3037 OperandMatchResultTy
3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3039                                               bool AllowImm) {
3040   bool Neg, SP3Neg;
3041   bool Abs, SP3Abs;
3042   SMLoc Loc;
3043 
3044   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3045   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3046     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3047     return MatchOperand_ParseFail;
3048   }
3049 
3050   SP3Neg = parseSP3NegModifier();
3051 
3052   Loc = getLoc();
3053   Neg = trySkipId("neg");
3054   if (Neg && SP3Neg) {
3055     Error(Loc, "expected register or immediate");
3056     return MatchOperand_ParseFail;
3057   }
3058   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3059     return MatchOperand_ParseFail;
3060 
3061   Abs = trySkipId("abs");
3062   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3063     return MatchOperand_ParseFail;
3064 
3065   Loc = getLoc();
3066   SP3Abs = trySkipToken(AsmToken::Pipe);
3067   if (Abs && SP3Abs) {
3068     Error(Loc, "expected register or immediate");
3069     return MatchOperand_ParseFail;
3070   }
3071 
3072   OperandMatchResultTy Res;
3073   if (AllowImm) {
3074     Res = parseRegOrImm(Operands, SP3Abs);
3075   } else {
3076     Res = parseReg(Operands);
3077   }
3078   if (Res != MatchOperand_Success) {
3079     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3080   }
3081 
3082   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3083     return MatchOperand_ParseFail;
3084   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3085     return MatchOperand_ParseFail;
3086   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3087     return MatchOperand_ParseFail;
3088 
3089   AMDGPUOperand::Modifiers Mods;
3090   Mods.Abs = Abs || SP3Abs;
3091   Mods.Neg = Neg || SP3Neg;
3092 
3093   if (Mods.hasFPModifiers()) {
3094     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3095     if (Op.isExpr()) {
3096       Error(Op.getStartLoc(), "expected an absolute expression");
3097       return MatchOperand_ParseFail;
3098     }
3099     Op.setModifiers(Mods);
3100   }
3101   return MatchOperand_Success;
3102 }
3103 
3104 OperandMatchResultTy
3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3106                                                bool AllowImm) {
3107   bool Sext = trySkipId("sext");
3108   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3109     return MatchOperand_ParseFail;
3110 
3111   OperandMatchResultTy Res;
3112   if (AllowImm) {
3113     Res = parseRegOrImm(Operands);
3114   } else {
3115     Res = parseReg(Operands);
3116   }
3117   if (Res != MatchOperand_Success) {
3118     return Sext? MatchOperand_ParseFail : Res;
3119   }
3120 
3121   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3122     return MatchOperand_ParseFail;
3123 
3124   AMDGPUOperand::Modifiers Mods;
3125   Mods.Sext = Sext;
3126 
3127   if (Mods.hasIntModifiers()) {
3128     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3129     if (Op.isExpr()) {
3130       Error(Op.getStartLoc(), "expected an absolute expression");
3131       return MatchOperand_ParseFail;
3132     }
3133     Op.setModifiers(Mods);
3134   }
3135 
3136   return MatchOperand_Success;
3137 }
3138 
3139 OperandMatchResultTy
3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3141   return parseRegOrImmWithFPInputMods(Operands, false);
3142 }
3143 
3144 OperandMatchResultTy
3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3146   return parseRegOrImmWithIntInputMods(Operands, false);
3147 }
3148 
3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3150   auto Loc = getLoc();
3151   if (trySkipId("off")) {
3152     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3153                                                 AMDGPUOperand::ImmTyOff, false));
3154     return MatchOperand_Success;
3155   }
3156 
3157   if (!isRegister())
3158     return MatchOperand_NoMatch;
3159 
3160   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3161   if (Reg) {
3162     Operands.push_back(std::move(Reg));
3163     return MatchOperand_Success;
3164   }
3165 
3166   return MatchOperand_ParseFail;
3167 
3168 }
3169 
3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3171   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3172 
3173   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3174       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3175       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3176       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3177     return Match_InvalidOperand;
3178 
3179   if ((TSFlags & SIInstrFlags::VOP3) &&
3180       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3181       getForcedEncodingSize() != 64)
3182     return Match_PreferE32;
3183 
3184   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3185       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3186     // v_mac_f32/16 allow only dst_sel == DWORD;
3187     auto OpNum =
3188         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3189     const auto &Op = Inst.getOperand(OpNum);
3190     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3191       return Match_InvalidOperand;
3192     }
3193   }
3194 
3195   return Match_Success;
3196 }
3197 
3198 static ArrayRef<unsigned> getAllVariants() {
3199   static const unsigned Variants[] = {
3200     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3201     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3202   };
3203 
3204   return makeArrayRef(Variants);
3205 }
3206 
3207 // What asm variants we should check
3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3209   if (getForcedEncodingSize() == 32) {
3210     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3211     return makeArrayRef(Variants);
3212   }
3213 
3214   if (isForcedVOP3()) {
3215     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3216     return makeArrayRef(Variants);
3217   }
3218 
3219   if (isForcedSDWA()) {
3220     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3221                                         AMDGPUAsmVariants::SDWA9};
3222     return makeArrayRef(Variants);
3223   }
3224 
3225   if (isForcedDPP()) {
3226     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3227     return makeArrayRef(Variants);
3228   }
3229 
3230   return getAllVariants();
3231 }
3232 
3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3234   if (getForcedEncodingSize() == 32)
3235     return "e32";
3236 
3237   if (isForcedVOP3())
3238     return "e64";
3239 
3240   if (isForcedSDWA())
3241     return "sdwa";
3242 
3243   if (isForcedDPP())
3244     return "dpp";
3245 
3246   return "";
3247 }
3248 
3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3250   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3251   const unsigned Num = Desc.getNumImplicitUses();
3252   for (unsigned i = 0; i < Num; ++i) {
3253     unsigned Reg = Desc.ImplicitUses[i];
3254     switch (Reg) {
3255     case AMDGPU::FLAT_SCR:
3256     case AMDGPU::VCC:
3257     case AMDGPU::VCC_LO:
3258     case AMDGPU::VCC_HI:
3259     case AMDGPU::M0:
3260       return Reg;
3261     default:
3262       break;
3263     }
3264   }
3265   return AMDGPU::NoRegister;
3266 }
3267 
3268 // NB: This code is correct only when used to check constant
3269 // bus limitations because GFX7 support no f16 inline constants.
3270 // Note that there are no cases when a GFX7 opcode violates
3271 // constant bus limitations due to the use of an f16 constant.
3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3273                                        unsigned OpIdx) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275 
3276   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3277     return false;
3278   }
3279 
3280   const MCOperand &MO = Inst.getOperand(OpIdx);
3281 
3282   int64_t Val = MO.getImm();
3283   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3284 
3285   switch (OpSize) { // expected operand size
3286   case 8:
3287     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3288   case 4:
3289     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3290   case 2: {
3291     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3292     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3293         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3294         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3295       return AMDGPU::isInlinableIntLiteral(Val);
3296 
3297     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3298         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3299         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3300       return AMDGPU::isInlinableIntLiteralV216(Val);
3301 
3302     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3304         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3305       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3306 
3307     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3308   }
3309   default:
3310     llvm_unreachable("invalid operand size");
3311   }
3312 }
3313 
3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3315   if (!isGFX10Plus())
3316     return 1;
3317 
3318   switch (Opcode) {
3319   // 64-bit shift instructions can use only one scalar value input
3320   case AMDGPU::V_LSHLREV_B64_e64:
3321   case AMDGPU::V_LSHLREV_B64_gfx10:
3322   case AMDGPU::V_LSHRREV_B64_e64:
3323   case AMDGPU::V_LSHRREV_B64_gfx10:
3324   case AMDGPU::V_ASHRREV_I64_e64:
3325   case AMDGPU::V_ASHRREV_I64_gfx10:
3326   case AMDGPU::V_LSHL_B64_e64:
3327   case AMDGPU::V_LSHR_B64_e64:
3328   case AMDGPU::V_ASHR_I64_e64:
3329     return 1;
3330   default:
3331     return 2;
3332   }
3333 }
3334 
3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3336   const MCOperand &MO = Inst.getOperand(OpIdx);
3337   if (MO.isImm()) {
3338     return !isInlineConstant(Inst, OpIdx);
3339   } else if (MO.isReg()) {
3340     auto Reg = MO.getReg();
3341     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3342     auto PReg = mc2PseudoReg(Reg);
3343     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3344   } else {
3345     return true;
3346   }
3347 }
3348 
3349 bool
3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3351                                                 const OperandVector &Operands) {
3352   const unsigned Opcode = Inst.getOpcode();
3353   const MCInstrDesc &Desc = MII.get(Opcode);
3354   unsigned LastSGPR = AMDGPU::NoRegister;
3355   unsigned ConstantBusUseCount = 0;
3356   unsigned NumLiterals = 0;
3357   unsigned LiteralSize;
3358 
3359   if (Desc.TSFlags &
3360       (SIInstrFlags::VOPC |
3361        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3362        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3363        SIInstrFlags::SDWA)) {
3364     // Check special imm operands (used by madmk, etc)
3365     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3366       ++NumLiterals;
3367       LiteralSize = 4;
3368     }
3369 
3370     SmallDenseSet<unsigned> SGPRsUsed;
3371     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3372     if (SGPRUsed != AMDGPU::NoRegister) {
3373       SGPRsUsed.insert(SGPRUsed);
3374       ++ConstantBusUseCount;
3375     }
3376 
3377     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3378     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3379     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3380 
3381     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3382 
3383     for (int OpIdx : OpIndices) {
3384       if (OpIdx == -1) break;
3385 
3386       const MCOperand &MO = Inst.getOperand(OpIdx);
3387       if (usesConstantBus(Inst, OpIdx)) {
3388         if (MO.isReg()) {
3389           LastSGPR = mc2PseudoReg(MO.getReg());
3390           // Pairs of registers with a partial intersections like these
3391           //   s0, s[0:1]
3392           //   flat_scratch_lo, flat_scratch
3393           //   flat_scratch_lo, flat_scratch_hi
3394           // are theoretically valid but they are disabled anyway.
3395           // Note that this code mimics SIInstrInfo::verifyInstruction
3396           if (!SGPRsUsed.count(LastSGPR)) {
3397             SGPRsUsed.insert(LastSGPR);
3398             ++ConstantBusUseCount;
3399           }
3400         } else { // Expression or a literal
3401 
3402           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3403             continue; // special operand like VINTERP attr_chan
3404 
3405           // An instruction may use only one literal.
3406           // This has been validated on the previous step.
3407           // See validateVOPLiteral.
3408           // This literal may be used as more than one operand.
3409           // If all these operands are of the same size,
3410           // this literal counts as one scalar value.
3411           // Otherwise it counts as 2 scalar values.
3412           // See "GFX10 Shader Programming", section 3.6.2.3.
3413 
3414           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3415           if (Size < 4) Size = 4;
3416 
3417           if (NumLiterals == 0) {
3418             NumLiterals = 1;
3419             LiteralSize = Size;
3420           } else if (LiteralSize != Size) {
3421             NumLiterals = 2;
3422           }
3423         }
3424       }
3425     }
3426   }
3427   ConstantBusUseCount += NumLiterals;
3428 
3429   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3430     return true;
3431 
3432   SMLoc LitLoc = getLitLoc(Operands);
3433   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3434   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3435   Error(Loc, "invalid operand (violates constant bus restrictions)");
3436   return false;
3437 }
3438 
3439 bool
3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3441                                                  const OperandVector &Operands) {
3442   const unsigned Opcode = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opcode);
3444 
3445   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3446   if (DstIdx == -1 ||
3447       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3448     return true;
3449   }
3450 
3451   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3452 
3453   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3454   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3455   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3456 
3457   assert(DstIdx != -1);
3458   const MCOperand &Dst = Inst.getOperand(DstIdx);
3459   assert(Dst.isReg());
3460 
3461   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3462 
3463   for (int SrcIdx : SrcIndices) {
3464     if (SrcIdx == -1) break;
3465     const MCOperand &Src = Inst.getOperand(SrcIdx);
3466     if (Src.isReg()) {
3467       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3468         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3469         Error(getRegLoc(SrcReg, Operands),
3470           "destination must be different than all sources");
3471         return false;
3472       }
3473     }
3474   }
3475 
3476   return true;
3477 }
3478 
3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3480 
3481   const unsigned Opc = Inst.getOpcode();
3482   const MCInstrDesc &Desc = MII.get(Opc);
3483 
3484   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3485     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3486     assert(ClampIdx != -1);
3487     return Inst.getOperand(ClampIdx).getImm() == 0;
3488   }
3489 
3490   return true;
3491 }
3492 
3493 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3499     return true;
3500 
3501   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3502   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3503   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3504 
3505   assert(VDataIdx != -1);
3506 
3507   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3508     return true;
3509 
3510   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3511   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3512   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3513   if (DMask == 0)
3514     DMask = 1;
3515 
3516   unsigned DataSize =
3517     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3518   if (hasPackedD16()) {
3519     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3520     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3521       DataSize = (DataSize + 1) / 2;
3522   }
3523 
3524   return (VDataSize / 4) == DataSize + TFESize;
3525 }
3526 
3527 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3532     return true;
3533 
3534   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3535 
3536   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3537       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3538   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3539   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3540   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3541   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3542 
3543   assert(VAddr0Idx != -1);
3544   assert(SrsrcIdx != -1);
3545   assert(SrsrcIdx > VAddr0Idx);
3546 
3547   if (DimIdx == -1)
3548     return true; // intersect_ray
3549 
3550   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3551   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3552   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3553   unsigned ActualAddrSize =
3554       IsNSA ? SrsrcIdx - VAddr0Idx
3555             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3556   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3557 
3558   unsigned ExpectedAddrSize =
3559       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3560 
3561   if (!IsNSA) {
3562     if (ExpectedAddrSize > 8)
3563       ExpectedAddrSize = 16;
3564 
3565     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3566     // This provides backward compatibility for assembly created
3567     // before 160b/192b/224b types were directly supported.
3568     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3569       return true;
3570   }
3571 
3572   return ActualAddrSize == ExpectedAddrSize;
3573 }
3574 
3575 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3576 
3577   const unsigned Opc = Inst.getOpcode();
3578   const MCInstrDesc &Desc = MII.get(Opc);
3579 
3580   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3581     return true;
3582   if (!Desc.mayLoad() || !Desc.mayStore())
3583     return true; // Not atomic
3584 
3585   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3586   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3587 
3588   // This is an incomplete check because image_atomic_cmpswap
3589   // may only use 0x3 and 0xf while other atomic operations
3590   // may use 0x1 and 0x3. However these limitations are
3591   // verified when we check that dmask matches dst size.
3592   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3593 }
3594 
3595 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3596 
3597   const unsigned Opc = Inst.getOpcode();
3598   const MCInstrDesc &Desc = MII.get(Opc);
3599 
3600   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3601     return true;
3602 
3603   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3604   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3605 
3606   // GATHER4 instructions use dmask in a different fashion compared to
3607   // other MIMG instructions. The only useful DMASK values are
3608   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3609   // (red,red,red,red) etc.) The ISA document doesn't mention
3610   // this.
3611   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3612 }
3613 
3614 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3615   const unsigned Opc = Inst.getOpcode();
3616   const MCInstrDesc &Desc = MII.get(Opc);
3617 
3618   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3619     return true;
3620 
3621   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3622   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3623       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3624 
3625   if (!BaseOpcode->MSAA)
3626     return true;
3627 
3628   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3629   assert(DimIdx != -1);
3630 
3631   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3632   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3633 
3634   return DimInfo->MSAA;
3635 }
3636 
3637 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3638 {
3639   switch (Opcode) {
3640   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3641   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3642   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3643     return true;
3644   default:
3645     return false;
3646   }
3647 }
3648 
3649 // movrels* opcodes should only allow VGPRS as src0.
3650 // This is specified in .td description for vop1/vop3,
3651 // but sdwa is handled differently. See isSDWAOperand.
3652 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3653                                       const OperandVector &Operands) {
3654 
3655   const unsigned Opc = Inst.getOpcode();
3656   const MCInstrDesc &Desc = MII.get(Opc);
3657 
3658   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3659     return true;
3660 
3661   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3662   assert(Src0Idx != -1);
3663 
3664   SMLoc ErrLoc;
3665   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3666   if (Src0.isReg()) {
3667     auto Reg = mc2PseudoReg(Src0.getReg());
3668     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3669     if (!isSGPR(Reg, TRI))
3670       return true;
3671     ErrLoc = getRegLoc(Reg, Operands);
3672   } else {
3673     ErrLoc = getConstLoc(Operands);
3674   }
3675 
3676   Error(ErrLoc, "source operand must be a VGPR");
3677   return false;
3678 }
3679 
3680 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3681                                           const OperandVector &Operands) {
3682 
3683   const unsigned Opc = Inst.getOpcode();
3684 
3685   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3686     return true;
3687 
3688   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3689   assert(Src0Idx != -1);
3690 
3691   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3692   if (!Src0.isReg())
3693     return true;
3694 
3695   auto Reg = mc2PseudoReg(Src0.getReg());
3696   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3697   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3698     Error(getRegLoc(Reg, Operands),
3699           "source operand must be either a VGPR or an inline constant");
3700     return false;
3701   }
3702 
3703   return true;
3704 }
3705 
3706 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3707                                    const OperandVector &Operands) {
3708   const unsigned Opc = Inst.getOpcode();
3709   const MCInstrDesc &Desc = MII.get(Opc);
3710 
3711   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3712     return true;
3713 
3714   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3715   if (Src2Idx == -1)
3716     return true;
3717 
3718   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3719   if (!Src2.isReg())
3720     return true;
3721 
3722   MCRegister Src2Reg = Src2.getReg();
3723   MCRegister DstReg = Inst.getOperand(0).getReg();
3724   if (Src2Reg == DstReg)
3725     return true;
3726 
3727   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3728   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3729     return true;
3730 
3731   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3732     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3733           "source 2 operand must not partially overlap with dst");
3734     return false;
3735   }
3736 
3737   return true;
3738 }
3739 
3740 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3741   switch (Inst.getOpcode()) {
3742   default:
3743     return true;
3744   case V_DIV_SCALE_F32_gfx6_gfx7:
3745   case V_DIV_SCALE_F32_vi:
3746   case V_DIV_SCALE_F32_gfx10:
3747   case V_DIV_SCALE_F64_gfx6_gfx7:
3748   case V_DIV_SCALE_F64_vi:
3749   case V_DIV_SCALE_F64_gfx10:
3750     break;
3751   }
3752 
3753   // TODO: Check that src0 = src1 or src2.
3754 
3755   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3756                     AMDGPU::OpName::src2_modifiers,
3757                     AMDGPU::OpName::src2_modifiers}) {
3758     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3759             .getImm() &
3760         SISrcMods::ABS) {
3761       return false;
3762     }
3763   }
3764 
3765   return true;
3766 }
3767 
3768 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3769 
3770   const unsigned Opc = Inst.getOpcode();
3771   const MCInstrDesc &Desc = MII.get(Opc);
3772 
3773   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3774     return true;
3775 
3776   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3777   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3778     if (isCI() || isSI())
3779       return false;
3780   }
3781 
3782   return true;
3783 }
3784 
3785 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3786   const unsigned Opc = Inst.getOpcode();
3787   const MCInstrDesc &Desc = MII.get(Opc);
3788 
3789   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3790     return true;
3791 
3792   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3793   if (DimIdx < 0)
3794     return true;
3795 
3796   long Imm = Inst.getOperand(DimIdx).getImm();
3797   if (Imm < 0 || Imm >= 8)
3798     return false;
3799 
3800   return true;
3801 }
3802 
3803 static bool IsRevOpcode(const unsigned Opcode)
3804 {
3805   switch (Opcode) {
3806   case AMDGPU::V_SUBREV_F32_e32:
3807   case AMDGPU::V_SUBREV_F32_e64:
3808   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3809   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3810   case AMDGPU::V_SUBREV_F32_e32_vi:
3811   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3812   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3813   case AMDGPU::V_SUBREV_F32_e64_vi:
3814 
3815   case AMDGPU::V_SUBREV_CO_U32_e32:
3816   case AMDGPU::V_SUBREV_CO_U32_e64:
3817   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3818   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3819 
3820   case AMDGPU::V_SUBBREV_U32_e32:
3821   case AMDGPU::V_SUBBREV_U32_e64:
3822   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3823   case AMDGPU::V_SUBBREV_U32_e32_vi:
3824   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3825   case AMDGPU::V_SUBBREV_U32_e64_vi:
3826 
3827   case AMDGPU::V_SUBREV_U32_e32:
3828   case AMDGPU::V_SUBREV_U32_e64:
3829   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3830   case AMDGPU::V_SUBREV_U32_e32_vi:
3831   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3832   case AMDGPU::V_SUBREV_U32_e64_vi:
3833 
3834   case AMDGPU::V_SUBREV_F16_e32:
3835   case AMDGPU::V_SUBREV_F16_e64:
3836   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3837   case AMDGPU::V_SUBREV_F16_e32_vi:
3838   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3839   case AMDGPU::V_SUBREV_F16_e64_vi:
3840 
3841   case AMDGPU::V_SUBREV_U16_e32:
3842   case AMDGPU::V_SUBREV_U16_e64:
3843   case AMDGPU::V_SUBREV_U16_e32_vi:
3844   case AMDGPU::V_SUBREV_U16_e64_vi:
3845 
3846   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3847   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3848   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3849 
3850   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3851   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3852 
3853   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3854   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3855 
3856   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3857   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3858 
3859   case AMDGPU::V_LSHRREV_B32_e32:
3860   case AMDGPU::V_LSHRREV_B32_e64:
3861   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3862   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3863   case AMDGPU::V_LSHRREV_B32_e32_vi:
3864   case AMDGPU::V_LSHRREV_B32_e64_vi:
3865   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3866   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3867 
3868   case AMDGPU::V_ASHRREV_I32_e32:
3869   case AMDGPU::V_ASHRREV_I32_e64:
3870   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3871   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3872   case AMDGPU::V_ASHRREV_I32_e32_vi:
3873   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3874   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3875   case AMDGPU::V_ASHRREV_I32_e64_vi:
3876 
3877   case AMDGPU::V_LSHLREV_B32_e32:
3878   case AMDGPU::V_LSHLREV_B32_e64:
3879   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3880   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3881   case AMDGPU::V_LSHLREV_B32_e32_vi:
3882   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3883   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3884   case AMDGPU::V_LSHLREV_B32_e64_vi:
3885 
3886   case AMDGPU::V_LSHLREV_B16_e32:
3887   case AMDGPU::V_LSHLREV_B16_e64:
3888   case AMDGPU::V_LSHLREV_B16_e32_vi:
3889   case AMDGPU::V_LSHLREV_B16_e64_vi:
3890   case AMDGPU::V_LSHLREV_B16_gfx10:
3891 
3892   case AMDGPU::V_LSHRREV_B16_e32:
3893   case AMDGPU::V_LSHRREV_B16_e64:
3894   case AMDGPU::V_LSHRREV_B16_e32_vi:
3895   case AMDGPU::V_LSHRREV_B16_e64_vi:
3896   case AMDGPU::V_LSHRREV_B16_gfx10:
3897 
3898   case AMDGPU::V_ASHRREV_I16_e32:
3899   case AMDGPU::V_ASHRREV_I16_e64:
3900   case AMDGPU::V_ASHRREV_I16_e32_vi:
3901   case AMDGPU::V_ASHRREV_I16_e64_vi:
3902   case AMDGPU::V_ASHRREV_I16_gfx10:
3903 
3904   case AMDGPU::V_LSHLREV_B64_e64:
3905   case AMDGPU::V_LSHLREV_B64_gfx10:
3906   case AMDGPU::V_LSHLREV_B64_vi:
3907 
3908   case AMDGPU::V_LSHRREV_B64_e64:
3909   case AMDGPU::V_LSHRREV_B64_gfx10:
3910   case AMDGPU::V_LSHRREV_B64_vi:
3911 
3912   case AMDGPU::V_ASHRREV_I64_e64:
3913   case AMDGPU::V_ASHRREV_I64_gfx10:
3914   case AMDGPU::V_ASHRREV_I64_vi:
3915 
3916   case AMDGPU::V_PK_LSHLREV_B16:
3917   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3918   case AMDGPU::V_PK_LSHLREV_B16_vi:
3919 
3920   case AMDGPU::V_PK_LSHRREV_B16:
3921   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3922   case AMDGPU::V_PK_LSHRREV_B16_vi:
3923   case AMDGPU::V_PK_ASHRREV_I16:
3924   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3925   case AMDGPU::V_PK_ASHRREV_I16_vi:
3926     return true;
3927   default:
3928     return false;
3929   }
3930 }
3931 
3932 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3933 
3934   using namespace SIInstrFlags;
3935   const unsigned Opcode = Inst.getOpcode();
3936   const MCInstrDesc &Desc = MII.get(Opcode);
3937 
3938   // lds_direct register is defined so that it can be used
3939   // with 9-bit operands only. Ignore encodings which do not accept these.
3940   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3941   if ((Desc.TSFlags & Enc) == 0)
3942     return None;
3943 
3944   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3945     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3946     if (SrcIdx == -1)
3947       break;
3948     const auto &Src = Inst.getOperand(SrcIdx);
3949     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3950 
3951       if (isGFX90A())
3952         return StringRef("lds_direct is not supported on this GPU");
3953 
3954       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3955         return StringRef("lds_direct cannot be used with this instruction");
3956 
3957       if (SrcName != OpName::src0)
3958         return StringRef("lds_direct may be used as src0 only");
3959     }
3960   }
3961 
3962   return None;
3963 }
3964 
3965 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3966   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3967     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3968     if (Op.isFlatOffset())
3969       return Op.getStartLoc();
3970   }
3971   return getLoc();
3972 }
3973 
3974 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3975                                          const OperandVector &Operands) {
3976   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3977   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3978     return true;
3979 
3980   auto Opcode = Inst.getOpcode();
3981   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3982   assert(OpNum != -1);
3983 
3984   const auto &Op = Inst.getOperand(OpNum);
3985   if (!hasFlatOffsets() && Op.getImm() != 0) {
3986     Error(getFlatOffsetLoc(Operands),
3987           "flat offset modifier is not supported on this GPU");
3988     return false;
3989   }
3990 
3991   // For FLAT segment the offset must be positive;
3992   // MSB is ignored and forced to zero.
3993   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3994     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3995     if (!isIntN(OffsetSize, Op.getImm())) {
3996       Error(getFlatOffsetLoc(Operands),
3997             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3998       return false;
3999     }
4000   } else {
4001     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4002     if (!isUIntN(OffsetSize, Op.getImm())) {
4003       Error(getFlatOffsetLoc(Operands),
4004             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4005       return false;
4006     }
4007   }
4008 
4009   return true;
4010 }
4011 
4012 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4013   // Start with second operand because SMEM Offset cannot be dst or src0.
4014   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4015     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4016     if (Op.isSMEMOffset())
4017       return Op.getStartLoc();
4018   }
4019   return getLoc();
4020 }
4021 
4022 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4023                                          const OperandVector &Operands) {
4024   if (isCI() || isSI())
4025     return true;
4026 
4027   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4028   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4029     return true;
4030 
4031   auto Opcode = Inst.getOpcode();
4032   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4033   if (OpNum == -1)
4034     return true;
4035 
4036   const auto &Op = Inst.getOperand(OpNum);
4037   if (!Op.isImm())
4038     return true;
4039 
4040   uint64_t Offset = Op.getImm();
4041   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4042   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4043       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4044     return true;
4045 
4046   Error(getSMEMOffsetLoc(Operands),
4047         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4048                                "expected a 21-bit signed offset");
4049 
4050   return false;
4051 }
4052 
4053 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4054   unsigned Opcode = Inst.getOpcode();
4055   const MCInstrDesc &Desc = MII.get(Opcode);
4056   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4057     return true;
4058 
4059   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4060   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4061 
4062   const int OpIndices[] = { Src0Idx, Src1Idx };
4063 
4064   unsigned NumExprs = 0;
4065   unsigned NumLiterals = 0;
4066   uint32_t LiteralValue;
4067 
4068   for (int OpIdx : OpIndices) {
4069     if (OpIdx == -1) break;
4070 
4071     const MCOperand &MO = Inst.getOperand(OpIdx);
4072     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4073     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4074       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4075         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4076         if (NumLiterals == 0 || LiteralValue != Value) {
4077           LiteralValue = Value;
4078           ++NumLiterals;
4079         }
4080       } else if (MO.isExpr()) {
4081         ++NumExprs;
4082       }
4083     }
4084   }
4085 
4086   return NumLiterals + NumExprs <= 1;
4087 }
4088 
4089 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4090   const unsigned Opc = Inst.getOpcode();
4091   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4092       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4093     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4094     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4095 
4096     if (OpSel & ~3)
4097       return false;
4098   }
4099 
4100   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4101     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4102     if (OpSelIdx != -1) {
4103       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4104         return false;
4105     }
4106     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4107     if (OpSelHiIdx != -1) {
4108       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4109         return false;
4110     }
4111   }
4112 
4113   return true;
4114 }
4115 
4116 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4117                                   const OperandVector &Operands) {
4118   const unsigned Opc = Inst.getOpcode();
4119   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4120   if (DppCtrlIdx < 0)
4121     return true;
4122   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4123 
4124   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4125     // DPP64 is supported for row_newbcast only.
4126     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4127     if (Src0Idx >= 0 &&
4128         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4129       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4130       Error(S, "64 bit dpp only supports row_newbcast");
4131       return false;
4132     }
4133   }
4134 
4135   return true;
4136 }
4137 
4138 // Check if VCC register matches wavefront size
4139 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4140   auto FB = getFeatureBits();
4141   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4142     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4143 }
4144 
4145 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4146 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4147                                          const OperandVector &Operands) {
4148   unsigned Opcode = Inst.getOpcode();
4149   const MCInstrDesc &Desc = MII.get(Opcode);
4150   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4151   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4152       ImmIdx == -1)
4153     return true;
4154 
4155   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4156   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4157   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4158 
4159   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4160 
4161   unsigned NumExprs = 0;
4162   unsigned NumLiterals = 0;
4163   uint32_t LiteralValue;
4164 
4165   for (int OpIdx : OpIndices) {
4166     if (OpIdx == -1)
4167       continue;
4168 
4169     const MCOperand &MO = Inst.getOperand(OpIdx);
4170     if (!MO.isImm() && !MO.isExpr())
4171       continue;
4172     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4173       continue;
4174 
4175     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4176         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4177       Error(getConstLoc(Operands),
4178             "inline constants are not allowed for this operand");
4179       return false;
4180     }
4181 
4182     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4183       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4184       if (NumLiterals == 0 || LiteralValue != Value) {
4185         LiteralValue = Value;
4186         ++NumLiterals;
4187       }
4188     } else if (MO.isExpr()) {
4189       ++NumExprs;
4190     }
4191   }
4192   NumLiterals += NumExprs;
4193 
4194   if (!NumLiterals)
4195     return true;
4196 
4197   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4198     Error(getLitLoc(Operands), "literal operands are not supported");
4199     return false;
4200   }
4201 
4202   if (NumLiterals > 1) {
4203     Error(getLitLoc(Operands), "only one literal operand is allowed");
4204     return false;
4205   }
4206 
4207   return true;
4208 }
4209 
4210 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4211 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4212                          const MCRegisterInfo *MRI) {
4213   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4214   if (OpIdx < 0)
4215     return -1;
4216 
4217   const MCOperand &Op = Inst.getOperand(OpIdx);
4218   if (!Op.isReg())
4219     return -1;
4220 
4221   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4222   auto Reg = Sub ? Sub : Op.getReg();
4223   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4224   return AGPR32.contains(Reg) ? 1 : 0;
4225 }
4226 
4227 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4228   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4229   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4230                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4231                   SIInstrFlags::DS)) == 0)
4232     return true;
4233 
4234   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4235                                                       : AMDGPU::OpName::vdata;
4236 
4237   const MCRegisterInfo *MRI = getMRI();
4238   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4239   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4240 
4241   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4242     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4243     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4244       return false;
4245   }
4246 
4247   auto FB = getFeatureBits();
4248   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4249     if (DataAreg < 0 || DstAreg < 0)
4250       return true;
4251     return DstAreg == DataAreg;
4252   }
4253 
4254   return DstAreg < 1 && DataAreg < 1;
4255 }
4256 
4257 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4258   auto FB = getFeatureBits();
4259   if (!FB[AMDGPU::FeatureGFX90AInsts])
4260     return true;
4261 
4262   const MCRegisterInfo *MRI = getMRI();
4263   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4264   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4265   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4266     const MCOperand &Op = Inst.getOperand(I);
4267     if (!Op.isReg())
4268       continue;
4269 
4270     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4271     if (!Sub)
4272       continue;
4273 
4274     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4275       return false;
4276     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4277       return false;
4278   }
4279 
4280   return true;
4281 }
4282 
4283 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4284   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4285     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4286     if (Op.isBLGP())
4287       return Op.getStartLoc();
4288   }
4289   return SMLoc();
4290 }
4291 
4292 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4293                                    const OperandVector &Operands) {
4294   unsigned Opc = Inst.getOpcode();
4295   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4296   if (BlgpIdx == -1)
4297     return true;
4298   SMLoc BLGPLoc = getBLGPLoc(Operands);
4299   if (!BLGPLoc.isValid())
4300     return true;
4301   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4302   auto FB = getFeatureBits();
4303   bool UsesNeg = false;
4304   if (FB[AMDGPU::FeatureGFX940Insts]) {
4305     switch (Opc) {
4306     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4307     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4308     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4309     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4310       UsesNeg = true;
4311     }
4312   }
4313 
4314   if (IsNeg == UsesNeg)
4315     return true;
4316 
4317   Error(BLGPLoc,
4318         UsesNeg ? "invalid modifier: blgp is not supported"
4319                 : "invalid modifier: neg is not supported");
4320 
4321   return false;
4322 }
4323 
4324 // gfx90a has an undocumented limitation:
4325 // DS_GWS opcodes must use even aligned registers.
4326 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4327                                   const OperandVector &Operands) {
4328   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4329     return true;
4330 
4331   int Opc = Inst.getOpcode();
4332   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4333       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4334     return true;
4335 
4336   const MCRegisterInfo *MRI = getMRI();
4337   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4338   int Data0Pos =
4339       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4340   assert(Data0Pos != -1);
4341   auto Reg = Inst.getOperand(Data0Pos).getReg();
4342   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4343   if (RegIdx & 1) {
4344     SMLoc RegLoc = getRegLoc(Reg, Operands);
4345     Error(RegLoc, "vgpr must be even aligned");
4346     return false;
4347   }
4348 
4349   return true;
4350 }
4351 
4352 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4353                                             const OperandVector &Operands,
4354                                             const SMLoc &IDLoc) {
4355   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4356                                            AMDGPU::OpName::cpol);
4357   if (CPolPos == -1)
4358     return true;
4359 
4360   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4361 
4362   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4363   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4364       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4365     Error(IDLoc, "invalid cache policy for SMRD instruction");
4366     return false;
4367   }
4368 
4369   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4370     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4371     StringRef CStr(S.getPointer());
4372     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4373     Error(S, "scc is not supported on this GPU");
4374     return false;
4375   }
4376 
4377   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4378     return true;
4379 
4380   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4381     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4382       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4383                               : "instruction must use glc");
4384       return false;
4385     }
4386   } else {
4387     if (CPol & CPol::GLC) {
4388       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4389       StringRef CStr(S.getPointer());
4390       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4391       Error(S, isGFX940() ? "instruction must not use sc0"
4392                           : "instruction must not use glc");
4393       return false;
4394     }
4395   }
4396 
4397   return true;
4398 }
4399 
4400 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4401                                           const SMLoc &IDLoc,
4402                                           const OperandVector &Operands) {
4403   if (auto ErrMsg = validateLdsDirect(Inst)) {
4404     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4405     return false;
4406   }
4407   if (!validateSOPLiteral(Inst)) {
4408     Error(getLitLoc(Operands),
4409       "only one literal operand is allowed");
4410     return false;
4411   }
4412   if (!validateVOPLiteral(Inst, Operands)) {
4413     return false;
4414   }
4415   if (!validateConstantBusLimitations(Inst, Operands)) {
4416     return false;
4417   }
4418   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4419     return false;
4420   }
4421   if (!validateIntClampSupported(Inst)) {
4422     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4423       "integer clamping is not supported on this GPU");
4424     return false;
4425   }
4426   if (!validateOpSel(Inst)) {
4427     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4428       "invalid op_sel operand");
4429     return false;
4430   }
4431   if (!validateDPP(Inst, Operands)) {
4432     return false;
4433   }
4434   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4435   if (!validateMIMGD16(Inst)) {
4436     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4437       "d16 modifier is not supported on this GPU");
4438     return false;
4439   }
4440   if (!validateMIMGDim(Inst)) {
4441     Error(IDLoc, "dim modifier is required on this GPU");
4442     return false;
4443   }
4444   if (!validateMIMGMSAA(Inst)) {
4445     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4446           "invalid dim; must be MSAA type");
4447     return false;
4448   }
4449   if (!validateMIMGDataSize(Inst)) {
4450     Error(IDLoc,
4451       "image data size does not match dmask and tfe");
4452     return false;
4453   }
4454   if (!validateMIMGAddrSize(Inst)) {
4455     Error(IDLoc,
4456       "image address size does not match dim and a16");
4457     return false;
4458   }
4459   if (!validateMIMGAtomicDMask(Inst)) {
4460     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4461       "invalid atomic image dmask");
4462     return false;
4463   }
4464   if (!validateMIMGGatherDMask(Inst)) {
4465     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4466       "invalid image_gather dmask: only one bit must be set");
4467     return false;
4468   }
4469   if (!validateMovrels(Inst, Operands)) {
4470     return false;
4471   }
4472   if (!validateFlatOffset(Inst, Operands)) {
4473     return false;
4474   }
4475   if (!validateSMEMOffset(Inst, Operands)) {
4476     return false;
4477   }
4478   if (!validateMAIAccWrite(Inst, Operands)) {
4479     return false;
4480   }
4481   if (!validateMFMA(Inst, Operands)) {
4482     return false;
4483   }
4484   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4485     return false;
4486   }
4487 
4488   if (!validateAGPRLdSt(Inst)) {
4489     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4490     ? "invalid register class: data and dst should be all VGPR or AGPR"
4491     : "invalid register class: agpr loads and stores not supported on this GPU"
4492     );
4493     return false;
4494   }
4495   if (!validateVGPRAlign(Inst)) {
4496     Error(IDLoc,
4497       "invalid register class: vgpr tuples must be 64 bit aligned");
4498     return false;
4499   }
4500   if (!validateGWS(Inst, Operands)) {
4501     return false;
4502   }
4503 
4504   if (!validateBLGP(Inst, Operands)) {
4505     return false;
4506   }
4507 
4508   if (!validateDivScale(Inst)) {
4509     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4510     return false;
4511   }
4512   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4513     return false;
4514   }
4515 
4516   return true;
4517 }
4518 
4519 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4520                                             const FeatureBitset &FBS,
4521                                             unsigned VariantID = 0);
4522 
4523 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4524                                 const FeatureBitset &AvailableFeatures,
4525                                 unsigned VariantID);
4526 
4527 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4528                                        const FeatureBitset &FBS) {
4529   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4530 }
4531 
4532 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4533                                        const FeatureBitset &FBS,
4534                                        ArrayRef<unsigned> Variants) {
4535   for (auto Variant : Variants) {
4536     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4537       return true;
4538   }
4539 
4540   return false;
4541 }
4542 
4543 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4544                                                   const SMLoc &IDLoc) {
4545   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4546 
4547   // Check if requested instruction variant is supported.
4548   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4549     return false;
4550 
4551   // This instruction is not supported.
4552   // Clear any other pending errors because they are no longer relevant.
4553   getParser().clearPendingErrors();
4554 
4555   // Requested instruction variant is not supported.
4556   // Check if any other variants are supported.
4557   StringRef VariantName = getMatchedVariantName();
4558   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4559     return Error(IDLoc,
4560                  Twine(VariantName,
4561                        " variant of this instruction is not supported"));
4562   }
4563 
4564   // Finally check if this instruction is supported on any other GPU.
4565   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4566     return Error(IDLoc, "instruction not supported on this GPU");
4567   }
4568 
4569   // Instruction not supported on any GPU. Probably a typo.
4570   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4571   return Error(IDLoc, "invalid instruction" + Suggestion);
4572 }
4573 
4574 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4575                                               OperandVector &Operands,
4576                                               MCStreamer &Out,
4577                                               uint64_t &ErrorInfo,
4578                                               bool MatchingInlineAsm) {
4579   MCInst Inst;
4580   unsigned Result = Match_Success;
4581   for (auto Variant : getMatchedVariants()) {
4582     uint64_t EI;
4583     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4584                                   Variant);
4585     // We order match statuses from least to most specific. We use most specific
4586     // status as resulting
4587     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4588     if ((R == Match_Success) ||
4589         (R == Match_PreferE32) ||
4590         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4591         (R == Match_InvalidOperand && Result != Match_MissingFeature
4592                                    && Result != Match_PreferE32) ||
4593         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4594                                    && Result != Match_MissingFeature
4595                                    && Result != Match_PreferE32)) {
4596       Result = R;
4597       ErrorInfo = EI;
4598     }
4599     if (R == Match_Success)
4600       break;
4601   }
4602 
4603   if (Result == Match_Success) {
4604     if (!validateInstruction(Inst, IDLoc, Operands)) {
4605       return true;
4606     }
4607     Inst.setLoc(IDLoc);
4608     Out.emitInstruction(Inst, getSTI());
4609     return false;
4610   }
4611 
4612   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4613   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4614     return true;
4615   }
4616 
4617   switch (Result) {
4618   default: break;
4619   case Match_MissingFeature:
4620     // It has been verified that the specified instruction
4621     // mnemonic is valid. A match was found but it requires
4622     // features which are not supported on this GPU.
4623     return Error(IDLoc, "operands are not valid for this GPU or mode");
4624 
4625   case Match_InvalidOperand: {
4626     SMLoc ErrorLoc = IDLoc;
4627     if (ErrorInfo != ~0ULL) {
4628       if (ErrorInfo >= Operands.size()) {
4629         return Error(IDLoc, "too few operands for instruction");
4630       }
4631       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4632       if (ErrorLoc == SMLoc())
4633         ErrorLoc = IDLoc;
4634     }
4635     return Error(ErrorLoc, "invalid operand for instruction");
4636   }
4637 
4638   case Match_PreferE32:
4639     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4640                         "should be encoded as e32");
4641   case Match_MnemonicFail:
4642     llvm_unreachable("Invalid instructions should have been handled already");
4643   }
4644   llvm_unreachable("Implement any new match types added!");
4645 }
4646 
4647 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4648   int64_t Tmp = -1;
4649   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4650     return true;
4651   }
4652   if (getParser().parseAbsoluteExpression(Tmp)) {
4653     return true;
4654   }
4655   Ret = static_cast<uint32_t>(Tmp);
4656   return false;
4657 }
4658 
4659 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4660                                                uint32_t &Minor) {
4661   if (ParseAsAbsoluteExpression(Major))
4662     return TokError("invalid major version");
4663 
4664   if (!trySkipToken(AsmToken::Comma))
4665     return TokError("minor version number required, comma expected");
4666 
4667   if (ParseAsAbsoluteExpression(Minor))
4668     return TokError("invalid minor version");
4669 
4670   return false;
4671 }
4672 
4673 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4674   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4675     return TokError("directive only supported for amdgcn architecture");
4676 
4677   std::string TargetIDDirective;
4678   SMLoc TargetStart = getTok().getLoc();
4679   if (getParser().parseEscapedString(TargetIDDirective))
4680     return true;
4681 
4682   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4683   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4684     return getParser().Error(TargetRange.Start,
4685         (Twine(".amdgcn_target directive's target id ") +
4686          Twine(TargetIDDirective) +
4687          Twine(" does not match the specified target id ") +
4688          Twine(getTargetStreamer().getTargetID()->toString())).str());
4689 
4690   return false;
4691 }
4692 
4693 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4694   return Error(Range.Start, "value out of range", Range);
4695 }
4696 
4697 bool AMDGPUAsmParser::calculateGPRBlocks(
4698     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4699     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4700     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4701     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4702   // TODO(scott.linder): These calculations are duplicated from
4703   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4704   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4705 
4706   unsigned NumVGPRs = NextFreeVGPR;
4707   unsigned NumSGPRs = NextFreeSGPR;
4708 
4709   if (Version.Major >= 10)
4710     NumSGPRs = 0;
4711   else {
4712     unsigned MaxAddressableNumSGPRs =
4713         IsaInfo::getAddressableNumSGPRs(&getSTI());
4714 
4715     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4716         NumSGPRs > MaxAddressableNumSGPRs)
4717       return OutOfRangeError(SGPRRange);
4718 
4719     NumSGPRs +=
4720         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4721 
4722     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4723         NumSGPRs > MaxAddressableNumSGPRs)
4724       return OutOfRangeError(SGPRRange);
4725 
4726     if (Features.test(FeatureSGPRInitBug))
4727       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4728   }
4729 
4730   VGPRBlocks =
4731       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4732   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4733 
4734   return false;
4735 }
4736 
4737 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4738   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4739     return TokError("directive only supported for amdgcn architecture");
4740 
4741   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4742     return TokError("directive only supported for amdhsa OS");
4743 
4744   StringRef KernelName;
4745   if (getParser().parseIdentifier(KernelName))
4746     return true;
4747 
4748   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4749 
4750   StringSet<> Seen;
4751 
4752   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4753 
4754   SMRange VGPRRange;
4755   uint64_t NextFreeVGPR = 0;
4756   uint64_t AccumOffset = 0;
4757   uint64_t SharedVGPRCount = 0;
4758   SMRange SGPRRange;
4759   uint64_t NextFreeSGPR = 0;
4760 
4761   // Count the number of user SGPRs implied from the enabled feature bits.
4762   unsigned ImpliedUserSGPRCount = 0;
4763 
4764   // Track if the asm explicitly contains the directive for the user SGPR
4765   // count.
4766   Optional<unsigned> ExplicitUserSGPRCount;
4767   bool ReserveVCC = true;
4768   bool ReserveFlatScr = true;
4769   Optional<bool> EnableWavefrontSize32;
4770 
4771   while (true) {
4772     while (trySkipToken(AsmToken::EndOfStatement));
4773 
4774     StringRef ID;
4775     SMRange IDRange = getTok().getLocRange();
4776     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4777       return true;
4778 
4779     if (ID == ".end_amdhsa_kernel")
4780       break;
4781 
4782     if (Seen.find(ID) != Seen.end())
4783       return TokError(".amdhsa_ directives cannot be repeated");
4784     Seen.insert(ID);
4785 
4786     SMLoc ValStart = getLoc();
4787     int64_t IVal;
4788     if (getParser().parseAbsoluteExpression(IVal))
4789       return true;
4790     SMLoc ValEnd = getLoc();
4791     SMRange ValRange = SMRange(ValStart, ValEnd);
4792 
4793     if (IVal < 0)
4794       return OutOfRangeError(ValRange);
4795 
4796     uint64_t Val = IVal;
4797 
4798 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4799   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4800     return OutOfRangeError(RANGE);                                             \
4801   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4802 
4803     if (ID == ".amdhsa_group_segment_fixed_size") {
4804       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4805         return OutOfRangeError(ValRange);
4806       KD.group_segment_fixed_size = Val;
4807     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4808       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4809         return OutOfRangeError(ValRange);
4810       KD.private_segment_fixed_size = Val;
4811     } else if (ID == ".amdhsa_kernarg_size") {
4812       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4813         return OutOfRangeError(ValRange);
4814       KD.kernarg_size = Val;
4815     } else if (ID == ".amdhsa_user_sgpr_count") {
4816       ExplicitUserSGPRCount = Val;
4817     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4818       if (hasArchitectedFlatScratch())
4819         return Error(IDRange.Start,
4820                      "directive is not supported with architected flat scratch",
4821                      IDRange);
4822       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4823                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4824                        Val, ValRange);
4825       if (Val)
4826         ImpliedUserSGPRCount += 4;
4827     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4828       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4829                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4830                        ValRange);
4831       if (Val)
4832         ImpliedUserSGPRCount += 2;
4833     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4834       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4835                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4836                        ValRange);
4837       if (Val)
4838         ImpliedUserSGPRCount += 2;
4839     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4840       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4841                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4842                        Val, ValRange);
4843       if (Val)
4844         ImpliedUserSGPRCount += 2;
4845     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4846       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4847                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4848                        ValRange);
4849       if (Val)
4850         ImpliedUserSGPRCount += 2;
4851     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4852       if (hasArchitectedFlatScratch())
4853         return Error(IDRange.Start,
4854                      "directive is not supported with architected flat scratch",
4855                      IDRange);
4856       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4857                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4858                        ValRange);
4859       if (Val)
4860         ImpliedUserSGPRCount += 2;
4861     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4862       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4863                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4864                        Val, ValRange);
4865       if (Val)
4866         ImpliedUserSGPRCount += 1;
4867     } else if (ID == ".amdhsa_wavefront_size32") {
4868       if (IVersion.Major < 10)
4869         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4870       EnableWavefrontSize32 = Val;
4871       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4872                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4873                        Val, ValRange);
4874     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4875       if (hasArchitectedFlatScratch())
4876         return Error(IDRange.Start,
4877                      "directive is not supported with architected flat scratch",
4878                      IDRange);
4879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4880                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4881     } else if (ID == ".amdhsa_enable_private_segment") {
4882       if (!hasArchitectedFlatScratch())
4883         return Error(
4884             IDRange.Start,
4885             "directive is not supported without architected flat scratch",
4886             IDRange);
4887       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4888                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4889     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4890       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4891                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4892                        ValRange);
4893     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4894       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4895                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4896                        ValRange);
4897     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4899                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4900                        ValRange);
4901     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4902       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4903                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4904                        ValRange);
4905     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4906       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4907                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4908                        ValRange);
4909     } else if (ID == ".amdhsa_next_free_vgpr") {
4910       VGPRRange = ValRange;
4911       NextFreeVGPR = Val;
4912     } else if (ID == ".amdhsa_next_free_sgpr") {
4913       SGPRRange = ValRange;
4914       NextFreeSGPR = Val;
4915     } else if (ID == ".amdhsa_accum_offset") {
4916       if (!isGFX90A())
4917         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4918       AccumOffset = Val;
4919     } else if (ID == ".amdhsa_reserve_vcc") {
4920       if (!isUInt<1>(Val))
4921         return OutOfRangeError(ValRange);
4922       ReserveVCC = Val;
4923     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4924       if (IVersion.Major < 7)
4925         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4926       if (hasArchitectedFlatScratch())
4927         return Error(IDRange.Start,
4928                      "directive is not supported with architected flat scratch",
4929                      IDRange);
4930       if (!isUInt<1>(Val))
4931         return OutOfRangeError(ValRange);
4932       ReserveFlatScr = Val;
4933     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4934       if (IVersion.Major < 8)
4935         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4936       if (!isUInt<1>(Val))
4937         return OutOfRangeError(ValRange);
4938       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4939         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4940                                  IDRange);
4941     } else if (ID == ".amdhsa_float_round_mode_32") {
4942       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4943                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4944     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4946                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4947     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4949                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4950     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4951       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4952                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4953                        ValRange);
4954     } else if (ID == ".amdhsa_dx10_clamp") {
4955       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4956                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4957     } else if (ID == ".amdhsa_ieee_mode") {
4958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4959                        Val, ValRange);
4960     } else if (ID == ".amdhsa_fp16_overflow") {
4961       if (IVersion.Major < 9)
4962         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4963       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4964                        ValRange);
4965     } else if (ID == ".amdhsa_tg_split") {
4966       if (!isGFX90A())
4967         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4968       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4969                        ValRange);
4970     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4971       if (IVersion.Major < 10)
4972         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4973       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4974                        ValRange);
4975     } else if (ID == ".amdhsa_memory_ordered") {
4976       if (IVersion.Major < 10)
4977         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4978       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4979                        ValRange);
4980     } else if (ID == ".amdhsa_forward_progress") {
4981       if (IVersion.Major < 10)
4982         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4983       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4984                        ValRange);
4985     } else if (ID == ".amdhsa_shared_vgpr_count") {
4986       if (IVersion.Major < 10)
4987         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4988       SharedVGPRCount = Val;
4989       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4990                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4991                        ValRange);
4992     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4993       PARSE_BITS_ENTRY(
4994           KD.compute_pgm_rsrc2,
4995           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4996           ValRange);
4997     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4998       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4999                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5000                        Val, ValRange);
5001     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5002       PARSE_BITS_ENTRY(
5003           KD.compute_pgm_rsrc2,
5004           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5005           ValRange);
5006     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5007       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5008                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5009                        Val, ValRange);
5010     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5012                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5013                        Val, ValRange);
5014     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5015       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5016                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5017                        Val, ValRange);
5018     } else if (ID == ".amdhsa_exception_int_div_zero") {
5019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5020                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5021                        Val, ValRange);
5022     } else {
5023       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5024     }
5025 
5026 #undef PARSE_BITS_ENTRY
5027   }
5028 
5029   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5030     return TokError(".amdhsa_next_free_vgpr directive is required");
5031 
5032   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5033     return TokError(".amdhsa_next_free_sgpr directive is required");
5034 
5035   unsigned VGPRBlocks;
5036   unsigned SGPRBlocks;
5037   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5038                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5039                          EnableWavefrontSize32, NextFreeVGPR,
5040                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5041                          SGPRBlocks))
5042     return true;
5043 
5044   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5045           VGPRBlocks))
5046     return OutOfRangeError(VGPRRange);
5047   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5048                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5049 
5050   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5051           SGPRBlocks))
5052     return OutOfRangeError(SGPRRange);
5053   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5054                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5055                   SGPRBlocks);
5056 
5057   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5058     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5059                     "enabled user SGPRs");
5060 
5061   unsigned UserSGPRCount =
5062       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5063 
5064   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5065     return TokError("too many user SGPRs enabled");
5066   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5067                   UserSGPRCount);
5068 
5069   if (isGFX90A()) {
5070     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5071       return TokError(".amdhsa_accum_offset directive is required");
5072     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5073       return TokError("accum_offset should be in range [4..256] in "
5074                       "increments of 4");
5075     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5076       return TokError("accum_offset exceeds total VGPR allocation");
5077     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5078                     (AccumOffset / 4 - 1));
5079   }
5080 
5081   if (IVersion.Major == 10) {
5082     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5083     if (SharedVGPRCount && EnableWavefrontSize32) {
5084       return TokError("shared_vgpr_count directive not valid on "
5085                       "wavefront size 32");
5086     }
5087     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5088       return TokError("shared_vgpr_count*2 + "
5089                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5090                       "exceed 63\n");
5091     }
5092   }
5093 
5094   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5095       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5096       ReserveFlatScr);
5097   return false;
5098 }
5099 
5100 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5101   uint32_t Major;
5102   uint32_t Minor;
5103 
5104   if (ParseDirectiveMajorMinor(Major, Minor))
5105     return true;
5106 
5107   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5108   return false;
5109 }
5110 
5111 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5112   uint32_t Major;
5113   uint32_t Minor;
5114   uint32_t Stepping;
5115   StringRef VendorName;
5116   StringRef ArchName;
5117 
5118   // If this directive has no arguments, then use the ISA version for the
5119   // targeted GPU.
5120   if (isToken(AsmToken::EndOfStatement)) {
5121     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5122     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5123                                                         ISA.Stepping,
5124                                                         "AMD", "AMDGPU");
5125     return false;
5126   }
5127 
5128   if (ParseDirectiveMajorMinor(Major, Minor))
5129     return true;
5130 
5131   if (!trySkipToken(AsmToken::Comma))
5132     return TokError("stepping version number required, comma expected");
5133 
5134   if (ParseAsAbsoluteExpression(Stepping))
5135     return TokError("invalid stepping version");
5136 
5137   if (!trySkipToken(AsmToken::Comma))
5138     return TokError("vendor name required, comma expected");
5139 
5140   if (!parseString(VendorName, "invalid vendor name"))
5141     return true;
5142 
5143   if (!trySkipToken(AsmToken::Comma))
5144     return TokError("arch name required, comma expected");
5145 
5146   if (!parseString(ArchName, "invalid arch name"))
5147     return true;
5148 
5149   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5150                                                       VendorName, ArchName);
5151   return false;
5152 }
5153 
5154 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5155                                                amd_kernel_code_t &Header) {
5156   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5157   // assembly for backwards compatibility.
5158   if (ID == "max_scratch_backing_memory_byte_size") {
5159     Parser.eatToEndOfStatement();
5160     return false;
5161   }
5162 
5163   SmallString<40> ErrStr;
5164   raw_svector_ostream Err(ErrStr);
5165   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5166     return TokError(Err.str());
5167   }
5168   Lex();
5169 
5170   if (ID == "enable_wavefront_size32") {
5171     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5172       if (!isGFX10Plus())
5173         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5174       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5175         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5176     } else {
5177       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5178         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5179     }
5180   }
5181 
5182   if (ID == "wavefront_size") {
5183     if (Header.wavefront_size == 5) {
5184       if (!isGFX10Plus())
5185         return TokError("wavefront_size=5 is only allowed on GFX10+");
5186       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5187         return TokError("wavefront_size=5 requires +WavefrontSize32");
5188     } else if (Header.wavefront_size == 6) {
5189       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5190         return TokError("wavefront_size=6 requires +WavefrontSize64");
5191     }
5192   }
5193 
5194   if (ID == "enable_wgp_mode") {
5195     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5196         !isGFX10Plus())
5197       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5198   }
5199 
5200   if (ID == "enable_mem_ordered") {
5201     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5202         !isGFX10Plus())
5203       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5204   }
5205 
5206   if (ID == "enable_fwd_progress") {
5207     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5208         !isGFX10Plus())
5209       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5210   }
5211 
5212   return false;
5213 }
5214 
5215 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5216   amd_kernel_code_t Header;
5217   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5218 
5219   while (true) {
5220     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5221     // will set the current token to EndOfStatement.
5222     while(trySkipToken(AsmToken::EndOfStatement));
5223 
5224     StringRef ID;
5225     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5226       return true;
5227 
5228     if (ID == ".end_amd_kernel_code_t")
5229       break;
5230 
5231     if (ParseAMDKernelCodeTValue(ID, Header))
5232       return true;
5233   }
5234 
5235   getTargetStreamer().EmitAMDKernelCodeT(Header);
5236 
5237   return false;
5238 }
5239 
5240 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5241   StringRef KernelName;
5242   if (!parseId(KernelName, "expected symbol name"))
5243     return true;
5244 
5245   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5246                                            ELF::STT_AMDGPU_HSA_KERNEL);
5247 
5248   KernelScope.initialize(getContext());
5249   return false;
5250 }
5251 
5252 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5253   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5254     return Error(getLoc(),
5255                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5256                  "architectures");
5257   }
5258 
5259   auto TargetIDDirective = getLexer().getTok().getStringContents();
5260   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5261     return Error(getParser().getTok().getLoc(), "target id must match options");
5262 
5263   getTargetStreamer().EmitISAVersion();
5264   Lex();
5265 
5266   return false;
5267 }
5268 
5269 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5270   const char *AssemblerDirectiveBegin;
5271   const char *AssemblerDirectiveEnd;
5272   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5273       isHsaAbiVersion3AndAbove(&getSTI())
5274           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5275                             HSAMD::V3::AssemblerDirectiveEnd)
5276           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5277                             HSAMD::AssemblerDirectiveEnd);
5278 
5279   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5280     return Error(getLoc(),
5281                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5282                  "not available on non-amdhsa OSes")).str());
5283   }
5284 
5285   std::string HSAMetadataString;
5286   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5287                           HSAMetadataString))
5288     return true;
5289 
5290   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5291     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5292       return Error(getLoc(), "invalid HSA metadata");
5293   } else {
5294     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5295       return Error(getLoc(), "invalid HSA metadata");
5296   }
5297 
5298   return false;
5299 }
5300 
5301 /// Common code to parse out a block of text (typically YAML) between start and
5302 /// end directives.
5303 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5304                                           const char *AssemblerDirectiveEnd,
5305                                           std::string &CollectString) {
5306 
5307   raw_string_ostream CollectStream(CollectString);
5308 
5309   getLexer().setSkipSpace(false);
5310 
5311   bool FoundEnd = false;
5312   while (!isToken(AsmToken::Eof)) {
5313     while (isToken(AsmToken::Space)) {
5314       CollectStream << getTokenStr();
5315       Lex();
5316     }
5317 
5318     if (trySkipId(AssemblerDirectiveEnd)) {
5319       FoundEnd = true;
5320       break;
5321     }
5322 
5323     CollectStream << Parser.parseStringToEndOfStatement()
5324                   << getContext().getAsmInfo()->getSeparatorString();
5325 
5326     Parser.eatToEndOfStatement();
5327   }
5328 
5329   getLexer().setSkipSpace(true);
5330 
5331   if (isToken(AsmToken::Eof) && !FoundEnd) {
5332     return TokError(Twine("expected directive ") +
5333                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5334   }
5335 
5336   CollectStream.flush();
5337   return false;
5338 }
5339 
5340 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5341 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5342   std::string String;
5343   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5344                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5345     return true;
5346 
5347   auto PALMetadata = getTargetStreamer().getPALMetadata();
5348   if (!PALMetadata->setFromString(String))
5349     return Error(getLoc(), "invalid PAL metadata");
5350   return false;
5351 }
5352 
5353 /// Parse the assembler directive for old linear-format PAL metadata.
5354 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5355   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5356     return Error(getLoc(),
5357                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5358                  "not available on non-amdpal OSes")).str());
5359   }
5360 
5361   auto PALMetadata = getTargetStreamer().getPALMetadata();
5362   PALMetadata->setLegacy();
5363   for (;;) {
5364     uint32_t Key, Value;
5365     if (ParseAsAbsoluteExpression(Key)) {
5366       return TokError(Twine("invalid value in ") +
5367                       Twine(PALMD::AssemblerDirective));
5368     }
5369     if (!trySkipToken(AsmToken::Comma)) {
5370       return TokError(Twine("expected an even number of values in ") +
5371                       Twine(PALMD::AssemblerDirective));
5372     }
5373     if (ParseAsAbsoluteExpression(Value)) {
5374       return TokError(Twine("invalid value in ") +
5375                       Twine(PALMD::AssemblerDirective));
5376     }
5377     PALMetadata->setRegister(Key, Value);
5378     if (!trySkipToken(AsmToken::Comma))
5379       break;
5380   }
5381   return false;
5382 }
5383 
5384 /// ParseDirectiveAMDGPULDS
5385 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5386 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5387   if (getParser().checkForValidSection())
5388     return true;
5389 
5390   StringRef Name;
5391   SMLoc NameLoc = getLoc();
5392   if (getParser().parseIdentifier(Name))
5393     return TokError("expected identifier in directive");
5394 
5395   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5396   if (parseToken(AsmToken::Comma, "expected ','"))
5397     return true;
5398 
5399   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5400 
5401   int64_t Size;
5402   SMLoc SizeLoc = getLoc();
5403   if (getParser().parseAbsoluteExpression(Size))
5404     return true;
5405   if (Size < 0)
5406     return Error(SizeLoc, "size must be non-negative");
5407   if (Size > LocalMemorySize)
5408     return Error(SizeLoc, "size is too large");
5409 
5410   int64_t Alignment = 4;
5411   if (trySkipToken(AsmToken::Comma)) {
5412     SMLoc AlignLoc = getLoc();
5413     if (getParser().parseAbsoluteExpression(Alignment))
5414       return true;
5415     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5416       return Error(AlignLoc, "alignment must be a power of two");
5417 
5418     // Alignment larger than the size of LDS is possible in theory, as long
5419     // as the linker manages to place to symbol at address 0, but we do want
5420     // to make sure the alignment fits nicely into a 32-bit integer.
5421     if (Alignment >= 1u << 31)
5422       return Error(AlignLoc, "alignment is too large");
5423   }
5424 
5425   if (parseToken(AsmToken::EndOfStatement,
5426                  "unexpected token in '.amdgpu_lds' directive"))
5427     return true;
5428 
5429   Symbol->redefineIfPossible();
5430   if (!Symbol->isUndefined())
5431     return Error(NameLoc, "invalid symbol redefinition");
5432 
5433   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5434   return false;
5435 }
5436 
5437 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5438   StringRef IDVal = DirectiveID.getString();
5439 
5440   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5441     if (IDVal == ".amdhsa_kernel")
5442      return ParseDirectiveAMDHSAKernel();
5443 
5444     // TODO: Restructure/combine with PAL metadata directive.
5445     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5446       return ParseDirectiveHSAMetadata();
5447   } else {
5448     if (IDVal == ".hsa_code_object_version")
5449       return ParseDirectiveHSACodeObjectVersion();
5450 
5451     if (IDVal == ".hsa_code_object_isa")
5452       return ParseDirectiveHSACodeObjectISA();
5453 
5454     if (IDVal == ".amd_kernel_code_t")
5455       return ParseDirectiveAMDKernelCodeT();
5456 
5457     if (IDVal == ".amdgpu_hsa_kernel")
5458       return ParseDirectiveAMDGPUHsaKernel();
5459 
5460     if (IDVal == ".amd_amdgpu_isa")
5461       return ParseDirectiveISAVersion();
5462 
5463     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5464       return ParseDirectiveHSAMetadata();
5465   }
5466 
5467   if (IDVal == ".amdgcn_target")
5468     return ParseDirectiveAMDGCNTarget();
5469 
5470   if (IDVal == ".amdgpu_lds")
5471     return ParseDirectiveAMDGPULDS();
5472 
5473   if (IDVal == PALMD::AssemblerDirectiveBegin)
5474     return ParseDirectivePALMetadataBegin();
5475 
5476   if (IDVal == PALMD::AssemblerDirective)
5477     return ParseDirectivePALMetadata();
5478 
5479   return true;
5480 }
5481 
5482 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5483                                            unsigned RegNo) {
5484 
5485   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5486     return isGFX9Plus();
5487 
5488   // GFX10 has 2 more SGPRs 104 and 105.
5489   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5490     return hasSGPR104_SGPR105();
5491 
5492   switch (RegNo) {
5493   case AMDGPU::SRC_SHARED_BASE:
5494   case AMDGPU::SRC_SHARED_LIMIT:
5495   case AMDGPU::SRC_PRIVATE_BASE:
5496   case AMDGPU::SRC_PRIVATE_LIMIT:
5497   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5498     return isGFX9Plus();
5499   case AMDGPU::TBA:
5500   case AMDGPU::TBA_LO:
5501   case AMDGPU::TBA_HI:
5502   case AMDGPU::TMA:
5503   case AMDGPU::TMA_LO:
5504   case AMDGPU::TMA_HI:
5505     return !isGFX9Plus();
5506   case AMDGPU::XNACK_MASK:
5507   case AMDGPU::XNACK_MASK_LO:
5508   case AMDGPU::XNACK_MASK_HI:
5509     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5510   case AMDGPU::SGPR_NULL:
5511     return isGFX10Plus();
5512   default:
5513     break;
5514   }
5515 
5516   if (isCI())
5517     return true;
5518 
5519   if (isSI() || isGFX10Plus()) {
5520     // No flat_scr on SI.
5521     // On GFX10 flat scratch is not a valid register operand and can only be
5522     // accessed with s_setreg/s_getreg.
5523     switch (RegNo) {
5524     case AMDGPU::FLAT_SCR:
5525     case AMDGPU::FLAT_SCR_LO:
5526     case AMDGPU::FLAT_SCR_HI:
5527       return false;
5528     default:
5529       return true;
5530     }
5531   }
5532 
5533   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5534   // SI/CI have.
5535   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5536     return hasSGPR102_SGPR103();
5537 
5538   return true;
5539 }
5540 
5541 OperandMatchResultTy
5542 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5543                               OperandMode Mode) {
5544   // Try to parse with a custom parser
5545   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5546 
5547   // If we successfully parsed the operand or if there as an error parsing,
5548   // we are done.
5549   //
5550   // If we are parsing after we reach EndOfStatement then this means we
5551   // are appending default values to the Operands list.  This is only done
5552   // by custom parser, so we shouldn't continue on to the generic parsing.
5553   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5554       isToken(AsmToken::EndOfStatement))
5555     return ResTy;
5556 
5557   SMLoc RBraceLoc;
5558   SMLoc LBraceLoc = getLoc();
5559   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5560     unsigned Prefix = Operands.size();
5561 
5562     for (;;) {
5563       auto Loc = getLoc();
5564       ResTy = parseReg(Operands);
5565       if (ResTy == MatchOperand_NoMatch)
5566         Error(Loc, "expected a register");
5567       if (ResTy != MatchOperand_Success)
5568         return MatchOperand_ParseFail;
5569 
5570       RBraceLoc = getLoc();
5571       if (trySkipToken(AsmToken::RBrac))
5572         break;
5573 
5574       if (!skipToken(AsmToken::Comma,
5575                      "expected a comma or a closing square bracket")) {
5576         return MatchOperand_ParseFail;
5577       }
5578     }
5579 
5580     if (Operands.size() - Prefix > 1) {
5581       Operands.insert(Operands.begin() + Prefix,
5582                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5583       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5584     }
5585 
5586     return MatchOperand_Success;
5587   }
5588 
5589   return parseRegOrImm(Operands);
5590 }
5591 
5592 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5593   // Clear any forced encodings from the previous instruction.
5594   setForcedEncodingSize(0);
5595   setForcedDPP(false);
5596   setForcedSDWA(false);
5597 
5598   if (Name.endswith("_e64")) {
5599     setForcedEncodingSize(64);
5600     return Name.substr(0, Name.size() - 4);
5601   } else if (Name.endswith("_e32")) {
5602     setForcedEncodingSize(32);
5603     return Name.substr(0, Name.size() - 4);
5604   } else if (Name.endswith("_dpp")) {
5605     setForcedDPP(true);
5606     return Name.substr(0, Name.size() - 4);
5607   } else if (Name.endswith("_sdwa")) {
5608     setForcedSDWA(true);
5609     return Name.substr(0, Name.size() - 5);
5610   }
5611   return Name;
5612 }
5613 
5614 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5615                                        StringRef Name,
5616                                        SMLoc NameLoc, OperandVector &Operands) {
5617   // Add the instruction mnemonic
5618   Name = parseMnemonicSuffix(Name);
5619   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5620 
5621   bool IsMIMG = Name.startswith("image_");
5622 
5623   while (!trySkipToken(AsmToken::EndOfStatement)) {
5624     OperandMode Mode = OperandMode_Default;
5625     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5626       Mode = OperandMode_NSA;
5627     CPolSeen = 0;
5628     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5629 
5630     if (Res != MatchOperand_Success) {
5631       checkUnsupportedInstruction(Name, NameLoc);
5632       if (!Parser.hasPendingError()) {
5633         // FIXME: use real operand location rather than the current location.
5634         StringRef Msg =
5635           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5636                                             "not a valid operand.";
5637         Error(getLoc(), Msg);
5638       }
5639       while (!trySkipToken(AsmToken::EndOfStatement)) {
5640         lex();
5641       }
5642       return true;
5643     }
5644 
5645     // Eat the comma or space if there is one.
5646     trySkipToken(AsmToken::Comma);
5647   }
5648 
5649   return false;
5650 }
5651 
5652 //===----------------------------------------------------------------------===//
5653 // Utility functions
5654 //===----------------------------------------------------------------------===//
5655 
5656 OperandMatchResultTy
5657 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5658 
5659   if (!trySkipId(Prefix, AsmToken::Colon))
5660     return MatchOperand_NoMatch;
5661 
5662   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5663 }
5664 
5665 OperandMatchResultTy
5666 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5667                                     AMDGPUOperand::ImmTy ImmTy,
5668                                     bool (*ConvertResult)(int64_t&)) {
5669   SMLoc S = getLoc();
5670   int64_t Value = 0;
5671 
5672   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5673   if (Res != MatchOperand_Success)
5674     return Res;
5675 
5676   if (ConvertResult && !ConvertResult(Value)) {
5677     Error(S, "invalid " + StringRef(Prefix) + " value.");
5678   }
5679 
5680   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5681   return MatchOperand_Success;
5682 }
5683 
5684 OperandMatchResultTy
5685 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5686                                              OperandVector &Operands,
5687                                              AMDGPUOperand::ImmTy ImmTy,
5688                                              bool (*ConvertResult)(int64_t&)) {
5689   SMLoc S = getLoc();
5690   if (!trySkipId(Prefix, AsmToken::Colon))
5691     return MatchOperand_NoMatch;
5692 
5693   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5694     return MatchOperand_ParseFail;
5695 
5696   unsigned Val = 0;
5697   const unsigned MaxSize = 4;
5698 
5699   // FIXME: How to verify the number of elements matches the number of src
5700   // operands?
5701   for (int I = 0; ; ++I) {
5702     int64_t Op;
5703     SMLoc Loc = getLoc();
5704     if (!parseExpr(Op))
5705       return MatchOperand_ParseFail;
5706 
5707     if (Op != 0 && Op != 1) {
5708       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5709       return MatchOperand_ParseFail;
5710     }
5711 
5712     Val |= (Op << I);
5713 
5714     if (trySkipToken(AsmToken::RBrac))
5715       break;
5716 
5717     if (I + 1 == MaxSize) {
5718       Error(getLoc(), "expected a closing square bracket");
5719       return MatchOperand_ParseFail;
5720     }
5721 
5722     if (!skipToken(AsmToken::Comma, "expected a comma"))
5723       return MatchOperand_ParseFail;
5724   }
5725 
5726   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5727   return MatchOperand_Success;
5728 }
5729 
5730 OperandMatchResultTy
5731 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5732                                AMDGPUOperand::ImmTy ImmTy) {
5733   int64_t Bit;
5734   SMLoc S = getLoc();
5735 
5736   if (trySkipId(Name)) {
5737     Bit = 1;
5738   } else if (trySkipId("no", Name)) {
5739     Bit = 0;
5740   } else {
5741     return MatchOperand_NoMatch;
5742   }
5743 
5744   if (Name == "r128" && !hasMIMG_R128()) {
5745     Error(S, "r128 modifier is not supported on this GPU");
5746     return MatchOperand_ParseFail;
5747   }
5748   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5749     Error(S, "a16 modifier is not supported on this GPU");
5750     return MatchOperand_ParseFail;
5751   }
5752 
5753   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5754     ImmTy = AMDGPUOperand::ImmTyR128A16;
5755 
5756   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5757   return MatchOperand_Success;
5758 }
5759 
5760 OperandMatchResultTy
5761 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5762   unsigned CPolOn = 0;
5763   unsigned CPolOff = 0;
5764   SMLoc S = getLoc();
5765 
5766   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5767   if (isGFX940() && !Mnemo.startswith("s_")) {
5768     if (trySkipId("sc0"))
5769       CPolOn = AMDGPU::CPol::SC0;
5770     else if (trySkipId("nosc0"))
5771       CPolOff = AMDGPU::CPol::SC0;
5772     else if (trySkipId("nt"))
5773       CPolOn = AMDGPU::CPol::NT;
5774     else if (trySkipId("nont"))
5775       CPolOff = AMDGPU::CPol::NT;
5776     else if (trySkipId("sc1"))
5777       CPolOn = AMDGPU::CPol::SC1;
5778     else if (trySkipId("nosc1"))
5779       CPolOff = AMDGPU::CPol::SC1;
5780     else
5781       return MatchOperand_NoMatch;
5782   }
5783   else if (trySkipId("glc"))
5784     CPolOn = AMDGPU::CPol::GLC;
5785   else if (trySkipId("noglc"))
5786     CPolOff = AMDGPU::CPol::GLC;
5787   else if (trySkipId("slc"))
5788     CPolOn = AMDGPU::CPol::SLC;
5789   else if (trySkipId("noslc"))
5790     CPolOff = AMDGPU::CPol::SLC;
5791   else if (trySkipId("dlc"))
5792     CPolOn = AMDGPU::CPol::DLC;
5793   else if (trySkipId("nodlc"))
5794     CPolOff = AMDGPU::CPol::DLC;
5795   else if (trySkipId("scc"))
5796     CPolOn = AMDGPU::CPol::SCC;
5797   else if (trySkipId("noscc"))
5798     CPolOff = AMDGPU::CPol::SCC;
5799   else
5800     return MatchOperand_NoMatch;
5801 
5802   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5803     Error(S, "dlc modifier is not supported on this GPU");
5804     return MatchOperand_ParseFail;
5805   }
5806 
5807   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5808     Error(S, "scc modifier is not supported on this GPU");
5809     return MatchOperand_ParseFail;
5810   }
5811 
5812   if (CPolSeen & (CPolOn | CPolOff)) {
5813     Error(S, "duplicate cache policy modifier");
5814     return MatchOperand_ParseFail;
5815   }
5816 
5817   CPolSeen |= (CPolOn | CPolOff);
5818 
5819   for (unsigned I = 1; I != Operands.size(); ++I) {
5820     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5821     if (Op.isCPol()) {
5822       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5823       return MatchOperand_Success;
5824     }
5825   }
5826 
5827   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5828                                               AMDGPUOperand::ImmTyCPol));
5829 
5830   return MatchOperand_Success;
5831 }
5832 
5833 static void addOptionalImmOperand(
5834   MCInst& Inst, const OperandVector& Operands,
5835   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5836   AMDGPUOperand::ImmTy ImmT,
5837   int64_t Default = 0) {
5838   auto i = OptionalIdx.find(ImmT);
5839   if (i != OptionalIdx.end()) {
5840     unsigned Idx = i->second;
5841     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5842   } else {
5843     Inst.addOperand(MCOperand::createImm(Default));
5844   }
5845 }
5846 
5847 OperandMatchResultTy
5848 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5849                                        StringRef &Value,
5850                                        SMLoc &StringLoc) {
5851   if (!trySkipId(Prefix, AsmToken::Colon))
5852     return MatchOperand_NoMatch;
5853 
5854   StringLoc = getLoc();
5855   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5856                                                   : MatchOperand_ParseFail;
5857 }
5858 
5859 //===----------------------------------------------------------------------===//
5860 // MTBUF format
5861 //===----------------------------------------------------------------------===//
5862 
5863 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5864                                   int64_t MaxVal,
5865                                   int64_t &Fmt) {
5866   int64_t Val;
5867   SMLoc Loc = getLoc();
5868 
5869   auto Res = parseIntWithPrefix(Pref, Val);
5870   if (Res == MatchOperand_ParseFail)
5871     return false;
5872   if (Res == MatchOperand_NoMatch)
5873     return true;
5874 
5875   if (Val < 0 || Val > MaxVal) {
5876     Error(Loc, Twine("out of range ", StringRef(Pref)));
5877     return false;
5878   }
5879 
5880   Fmt = Val;
5881   return true;
5882 }
5883 
5884 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5885 // values to live in a joint format operand in the MCInst encoding.
5886 OperandMatchResultTy
5887 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5888   using namespace llvm::AMDGPU::MTBUFFormat;
5889 
5890   int64_t Dfmt = DFMT_UNDEF;
5891   int64_t Nfmt = NFMT_UNDEF;
5892 
5893   // dfmt and nfmt can appear in either order, and each is optional.
5894   for (int I = 0; I < 2; ++I) {
5895     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5896       return MatchOperand_ParseFail;
5897 
5898     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5899       return MatchOperand_ParseFail;
5900     }
5901     // Skip optional comma between dfmt/nfmt
5902     // but guard against 2 commas following each other.
5903     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5904         !peekToken().is(AsmToken::Comma)) {
5905       trySkipToken(AsmToken::Comma);
5906     }
5907   }
5908 
5909   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5910     return MatchOperand_NoMatch;
5911 
5912   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5913   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5914 
5915   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5916   return MatchOperand_Success;
5917 }
5918 
5919 OperandMatchResultTy
5920 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5921   using namespace llvm::AMDGPU::MTBUFFormat;
5922 
5923   int64_t Fmt = UFMT_UNDEF;
5924 
5925   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5926     return MatchOperand_ParseFail;
5927 
5928   if (Fmt == UFMT_UNDEF)
5929     return MatchOperand_NoMatch;
5930 
5931   Format = Fmt;
5932   return MatchOperand_Success;
5933 }
5934 
5935 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5936                                     int64_t &Nfmt,
5937                                     StringRef FormatStr,
5938                                     SMLoc Loc) {
5939   using namespace llvm::AMDGPU::MTBUFFormat;
5940   int64_t Format;
5941 
5942   Format = getDfmt(FormatStr);
5943   if (Format != DFMT_UNDEF) {
5944     Dfmt = Format;
5945     return true;
5946   }
5947 
5948   Format = getNfmt(FormatStr, getSTI());
5949   if (Format != NFMT_UNDEF) {
5950     Nfmt = Format;
5951     return true;
5952   }
5953 
5954   Error(Loc, "unsupported format");
5955   return false;
5956 }
5957 
5958 OperandMatchResultTy
5959 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5960                                           SMLoc FormatLoc,
5961                                           int64_t &Format) {
5962   using namespace llvm::AMDGPU::MTBUFFormat;
5963 
5964   int64_t Dfmt = DFMT_UNDEF;
5965   int64_t Nfmt = NFMT_UNDEF;
5966   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5967     return MatchOperand_ParseFail;
5968 
5969   if (trySkipToken(AsmToken::Comma)) {
5970     StringRef Str;
5971     SMLoc Loc = getLoc();
5972     if (!parseId(Str, "expected a format string") ||
5973         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5974       return MatchOperand_ParseFail;
5975     }
5976     if (Dfmt == DFMT_UNDEF) {
5977       Error(Loc, "duplicate numeric format");
5978       return MatchOperand_ParseFail;
5979     } else if (Nfmt == NFMT_UNDEF) {
5980       Error(Loc, "duplicate data format");
5981       return MatchOperand_ParseFail;
5982     }
5983   }
5984 
5985   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5986   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5987 
5988   if (isGFX10Plus()) {
5989     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5990     if (Ufmt == UFMT_UNDEF) {
5991       Error(FormatLoc, "unsupported format");
5992       return MatchOperand_ParseFail;
5993     }
5994     Format = Ufmt;
5995   } else {
5996     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5997   }
5998 
5999   return MatchOperand_Success;
6000 }
6001 
6002 OperandMatchResultTy
6003 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6004                                             SMLoc Loc,
6005                                             int64_t &Format) {
6006   using namespace llvm::AMDGPU::MTBUFFormat;
6007 
6008   auto Id = getUnifiedFormat(FormatStr);
6009   if (Id == UFMT_UNDEF)
6010     return MatchOperand_NoMatch;
6011 
6012   if (!isGFX10Plus()) {
6013     Error(Loc, "unified format is not supported on this GPU");
6014     return MatchOperand_ParseFail;
6015   }
6016 
6017   Format = Id;
6018   return MatchOperand_Success;
6019 }
6020 
6021 OperandMatchResultTy
6022 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6023   using namespace llvm::AMDGPU::MTBUFFormat;
6024   SMLoc Loc = getLoc();
6025 
6026   if (!parseExpr(Format))
6027     return MatchOperand_ParseFail;
6028   if (!isValidFormatEncoding(Format, getSTI())) {
6029     Error(Loc, "out of range format");
6030     return MatchOperand_ParseFail;
6031   }
6032 
6033   return MatchOperand_Success;
6034 }
6035 
6036 OperandMatchResultTy
6037 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6038   using namespace llvm::AMDGPU::MTBUFFormat;
6039 
6040   if (!trySkipId("format", AsmToken::Colon))
6041     return MatchOperand_NoMatch;
6042 
6043   if (trySkipToken(AsmToken::LBrac)) {
6044     StringRef FormatStr;
6045     SMLoc Loc = getLoc();
6046     if (!parseId(FormatStr, "expected a format string"))
6047       return MatchOperand_ParseFail;
6048 
6049     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6050     if (Res == MatchOperand_NoMatch)
6051       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6052     if (Res != MatchOperand_Success)
6053       return Res;
6054 
6055     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6056       return MatchOperand_ParseFail;
6057 
6058     return MatchOperand_Success;
6059   }
6060 
6061   return parseNumericFormat(Format);
6062 }
6063 
6064 OperandMatchResultTy
6065 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6066   using namespace llvm::AMDGPU::MTBUFFormat;
6067 
6068   int64_t Format = getDefaultFormatEncoding(getSTI());
6069   OperandMatchResultTy Res;
6070   SMLoc Loc = getLoc();
6071 
6072   // Parse legacy format syntax.
6073   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6074   if (Res == MatchOperand_ParseFail)
6075     return Res;
6076 
6077   bool FormatFound = (Res == MatchOperand_Success);
6078 
6079   Operands.push_back(
6080     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6081 
6082   if (FormatFound)
6083     trySkipToken(AsmToken::Comma);
6084 
6085   if (isToken(AsmToken::EndOfStatement)) {
6086     // We are expecting an soffset operand,
6087     // but let matcher handle the error.
6088     return MatchOperand_Success;
6089   }
6090 
6091   // Parse soffset.
6092   Res = parseRegOrImm(Operands);
6093   if (Res != MatchOperand_Success)
6094     return Res;
6095 
6096   trySkipToken(AsmToken::Comma);
6097 
6098   if (!FormatFound) {
6099     Res = parseSymbolicOrNumericFormat(Format);
6100     if (Res == MatchOperand_ParseFail)
6101       return Res;
6102     if (Res == MatchOperand_Success) {
6103       auto Size = Operands.size();
6104       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6105       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6106       Op.setImm(Format);
6107     }
6108     return MatchOperand_Success;
6109   }
6110 
6111   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6112     Error(getLoc(), "duplicate format");
6113     return MatchOperand_ParseFail;
6114   }
6115   return MatchOperand_Success;
6116 }
6117 
6118 //===----------------------------------------------------------------------===//
6119 // ds
6120 //===----------------------------------------------------------------------===//
6121 
6122 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6123                                     const OperandVector &Operands) {
6124   OptionalImmIndexMap OptionalIdx;
6125 
6126   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6127     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6128 
6129     // Add the register arguments
6130     if (Op.isReg()) {
6131       Op.addRegOperands(Inst, 1);
6132       continue;
6133     }
6134 
6135     // Handle optional arguments
6136     OptionalIdx[Op.getImmTy()] = i;
6137   }
6138 
6139   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6140   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6141   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6142 
6143   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6144 }
6145 
6146 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6147                                 bool IsGdsHardcoded) {
6148   OptionalImmIndexMap OptionalIdx;
6149 
6150   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6151     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6152 
6153     // Add the register arguments
6154     if (Op.isReg()) {
6155       Op.addRegOperands(Inst, 1);
6156       continue;
6157     }
6158 
6159     if (Op.isToken() && Op.getToken() == "gds") {
6160       IsGdsHardcoded = true;
6161       continue;
6162     }
6163 
6164     // Handle optional arguments
6165     OptionalIdx[Op.getImmTy()] = i;
6166   }
6167 
6168   AMDGPUOperand::ImmTy OffsetType =
6169     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6170      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6171      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6172                                                       AMDGPUOperand::ImmTyOffset;
6173 
6174   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6175 
6176   if (!IsGdsHardcoded) {
6177     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6178   }
6179   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6180 }
6181 
6182 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6183   OptionalImmIndexMap OptionalIdx;
6184 
6185   unsigned OperandIdx[4];
6186   unsigned EnMask = 0;
6187   int SrcIdx = 0;
6188 
6189   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6190     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6191 
6192     // Add the register arguments
6193     if (Op.isReg()) {
6194       assert(SrcIdx < 4);
6195       OperandIdx[SrcIdx] = Inst.size();
6196       Op.addRegOperands(Inst, 1);
6197       ++SrcIdx;
6198       continue;
6199     }
6200 
6201     if (Op.isOff()) {
6202       assert(SrcIdx < 4);
6203       OperandIdx[SrcIdx] = Inst.size();
6204       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6205       ++SrcIdx;
6206       continue;
6207     }
6208 
6209     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6210       Op.addImmOperands(Inst, 1);
6211       continue;
6212     }
6213 
6214     if (Op.isToken() && Op.getToken() == "done")
6215       continue;
6216 
6217     // Handle optional arguments
6218     OptionalIdx[Op.getImmTy()] = i;
6219   }
6220 
6221   assert(SrcIdx == 4);
6222 
6223   bool Compr = false;
6224   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6225     Compr = true;
6226     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6227     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6228     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6229   }
6230 
6231   for (auto i = 0; i < SrcIdx; ++i) {
6232     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6233       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6234     }
6235   }
6236 
6237   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6238   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6239 
6240   Inst.addOperand(MCOperand::createImm(EnMask));
6241 }
6242 
6243 //===----------------------------------------------------------------------===//
6244 // s_waitcnt
6245 //===----------------------------------------------------------------------===//
6246 
6247 static bool
6248 encodeCnt(
6249   const AMDGPU::IsaVersion ISA,
6250   int64_t &IntVal,
6251   int64_t CntVal,
6252   bool Saturate,
6253   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6254   unsigned (*decode)(const IsaVersion &Version, unsigned))
6255 {
6256   bool Failed = false;
6257 
6258   IntVal = encode(ISA, IntVal, CntVal);
6259   if (CntVal != decode(ISA, IntVal)) {
6260     if (Saturate) {
6261       IntVal = encode(ISA, IntVal, -1);
6262     } else {
6263       Failed = true;
6264     }
6265   }
6266   return Failed;
6267 }
6268 
6269 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6270 
6271   SMLoc CntLoc = getLoc();
6272   StringRef CntName = getTokenStr();
6273 
6274   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6275       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6276     return false;
6277 
6278   int64_t CntVal;
6279   SMLoc ValLoc = getLoc();
6280   if (!parseExpr(CntVal))
6281     return false;
6282 
6283   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6284 
6285   bool Failed = true;
6286   bool Sat = CntName.endswith("_sat");
6287 
6288   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6289     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6290   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6291     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6292   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6293     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6294   } else {
6295     Error(CntLoc, "invalid counter name " + CntName);
6296     return false;
6297   }
6298 
6299   if (Failed) {
6300     Error(ValLoc, "too large value for " + CntName);
6301     return false;
6302   }
6303 
6304   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6305     return false;
6306 
6307   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6308     if (isToken(AsmToken::EndOfStatement)) {
6309       Error(getLoc(), "expected a counter name");
6310       return false;
6311     }
6312   }
6313 
6314   return true;
6315 }
6316 
6317 OperandMatchResultTy
6318 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6319   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6320   int64_t Waitcnt = getWaitcntBitMask(ISA);
6321   SMLoc S = getLoc();
6322 
6323   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6324     while (!isToken(AsmToken::EndOfStatement)) {
6325       if (!parseCnt(Waitcnt))
6326         return MatchOperand_ParseFail;
6327     }
6328   } else {
6329     if (!parseExpr(Waitcnt))
6330       return MatchOperand_ParseFail;
6331   }
6332 
6333   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6334   return MatchOperand_Success;
6335 }
6336 
6337 bool
6338 AMDGPUOperand::isSWaitCnt() const {
6339   return isImm();
6340 }
6341 
6342 //===----------------------------------------------------------------------===//
6343 // DepCtr
6344 //===----------------------------------------------------------------------===//
6345 
6346 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6347                                   StringRef DepCtrName) {
6348   switch (ErrorId) {
6349   case OPR_ID_UNKNOWN:
6350     Error(Loc, Twine("invalid counter name ", DepCtrName));
6351     return;
6352   case OPR_ID_UNSUPPORTED:
6353     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6354     return;
6355   case OPR_ID_DUPLICATE:
6356     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6357     return;
6358   case OPR_VAL_INVALID:
6359     Error(Loc, Twine("invalid value for ", DepCtrName));
6360     return;
6361   default:
6362     assert(false);
6363   }
6364 }
6365 
6366 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6367 
6368   using namespace llvm::AMDGPU::DepCtr;
6369 
6370   SMLoc DepCtrLoc = getLoc();
6371   StringRef DepCtrName = getTokenStr();
6372 
6373   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6374       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6375     return false;
6376 
6377   int64_t ExprVal;
6378   if (!parseExpr(ExprVal))
6379     return false;
6380 
6381   unsigned PrevOprMask = UsedOprMask;
6382   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6383 
6384   if (CntVal < 0) {
6385     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6386     return false;
6387   }
6388 
6389   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6390     return false;
6391 
6392   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6393     if (isToken(AsmToken::EndOfStatement)) {
6394       Error(getLoc(), "expected a counter name");
6395       return false;
6396     }
6397   }
6398 
6399   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6400   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6401   return true;
6402 }
6403 
6404 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6405   using namespace llvm::AMDGPU::DepCtr;
6406 
6407   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6408   SMLoc Loc = getLoc();
6409 
6410   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6411     unsigned UsedOprMask = 0;
6412     while (!isToken(AsmToken::EndOfStatement)) {
6413       if (!parseDepCtr(DepCtr, UsedOprMask))
6414         return MatchOperand_ParseFail;
6415     }
6416   } else {
6417     if (!parseExpr(DepCtr))
6418       return MatchOperand_ParseFail;
6419   }
6420 
6421   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6422   return MatchOperand_Success;
6423 }
6424 
6425 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6426 
6427 //===----------------------------------------------------------------------===//
6428 // hwreg
6429 //===----------------------------------------------------------------------===//
6430 
6431 bool
6432 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6433                                 OperandInfoTy &Offset,
6434                                 OperandInfoTy &Width) {
6435   using namespace llvm::AMDGPU::Hwreg;
6436 
6437   // The register may be specified by name or using a numeric code
6438   HwReg.Loc = getLoc();
6439   if (isToken(AsmToken::Identifier) &&
6440       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6441     HwReg.IsSymbolic = true;
6442     lex(); // skip register name
6443   } else if (!parseExpr(HwReg.Id, "a register name")) {
6444     return false;
6445   }
6446 
6447   if (trySkipToken(AsmToken::RParen))
6448     return true;
6449 
6450   // parse optional params
6451   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6452     return false;
6453 
6454   Offset.Loc = getLoc();
6455   if (!parseExpr(Offset.Id))
6456     return false;
6457 
6458   if (!skipToken(AsmToken::Comma, "expected a comma"))
6459     return false;
6460 
6461   Width.Loc = getLoc();
6462   return parseExpr(Width.Id) &&
6463          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6464 }
6465 
6466 bool
6467 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6468                                const OperandInfoTy &Offset,
6469                                const OperandInfoTy &Width) {
6470 
6471   using namespace llvm::AMDGPU::Hwreg;
6472 
6473   if (HwReg.IsSymbolic) {
6474     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6475       Error(HwReg.Loc,
6476             "specified hardware register is not supported on this GPU");
6477       return false;
6478     }
6479   } else {
6480     if (!isValidHwreg(HwReg.Id)) {
6481       Error(HwReg.Loc,
6482             "invalid code of hardware register: only 6-bit values are legal");
6483       return false;
6484     }
6485   }
6486   if (!isValidHwregOffset(Offset.Id)) {
6487     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6488     return false;
6489   }
6490   if (!isValidHwregWidth(Width.Id)) {
6491     Error(Width.Loc,
6492           "invalid bitfield width: only values from 1 to 32 are legal");
6493     return false;
6494   }
6495   return true;
6496 }
6497 
6498 OperandMatchResultTy
6499 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6500   using namespace llvm::AMDGPU::Hwreg;
6501 
6502   int64_t ImmVal = 0;
6503   SMLoc Loc = getLoc();
6504 
6505   if (trySkipId("hwreg", AsmToken::LParen)) {
6506     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6507     OperandInfoTy Offset(OFFSET_DEFAULT_);
6508     OperandInfoTy Width(WIDTH_DEFAULT_);
6509     if (parseHwregBody(HwReg, Offset, Width) &&
6510         validateHwreg(HwReg, Offset, Width)) {
6511       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6512     } else {
6513       return MatchOperand_ParseFail;
6514     }
6515   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6516     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6517       Error(Loc, "invalid immediate: only 16-bit values are legal");
6518       return MatchOperand_ParseFail;
6519     }
6520   } else {
6521     return MatchOperand_ParseFail;
6522   }
6523 
6524   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6525   return MatchOperand_Success;
6526 }
6527 
6528 bool AMDGPUOperand::isHwreg() const {
6529   return isImmTy(ImmTyHwreg);
6530 }
6531 
6532 //===----------------------------------------------------------------------===//
6533 // sendmsg
6534 //===----------------------------------------------------------------------===//
6535 
6536 bool
6537 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6538                                   OperandInfoTy &Op,
6539                                   OperandInfoTy &Stream) {
6540   using namespace llvm::AMDGPU::SendMsg;
6541 
6542   Msg.Loc = getLoc();
6543   if (isToken(AsmToken::Identifier) &&
6544       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6545     Msg.IsSymbolic = true;
6546     lex(); // skip message name
6547   } else if (!parseExpr(Msg.Id, "a message name")) {
6548     return false;
6549   }
6550 
6551   if (trySkipToken(AsmToken::Comma)) {
6552     Op.IsDefined = true;
6553     Op.Loc = getLoc();
6554     if (isToken(AsmToken::Identifier) &&
6555         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6556       lex(); // skip operation name
6557     } else if (!parseExpr(Op.Id, "an operation name")) {
6558       return false;
6559     }
6560 
6561     if (trySkipToken(AsmToken::Comma)) {
6562       Stream.IsDefined = true;
6563       Stream.Loc = getLoc();
6564       if (!parseExpr(Stream.Id))
6565         return false;
6566     }
6567   }
6568 
6569   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6570 }
6571 
6572 bool
6573 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6574                                  const OperandInfoTy &Op,
6575                                  const OperandInfoTy &Stream) {
6576   using namespace llvm::AMDGPU::SendMsg;
6577 
6578   // Validation strictness depends on whether message is specified
6579   // in a symbolic or in a numeric form. In the latter case
6580   // only encoding possibility is checked.
6581   bool Strict = Msg.IsSymbolic;
6582 
6583   if (Strict) {
6584     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6585       Error(Msg.Loc, "specified message id is not supported on this GPU");
6586       return false;
6587     }
6588   } else {
6589     if (!isValidMsgId(Msg.Id)) {
6590       Error(Msg.Loc, "invalid message id");
6591       return false;
6592     }
6593   }
6594   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6595     if (Op.IsDefined) {
6596       Error(Op.Loc, "message does not support operations");
6597     } else {
6598       Error(Msg.Loc, "missing message operation");
6599     }
6600     return false;
6601   }
6602   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6603     Error(Op.Loc, "invalid operation id");
6604     return false;
6605   }
6606   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6607     Error(Stream.Loc, "message operation does not support streams");
6608     return false;
6609   }
6610   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6611     Error(Stream.Loc, "invalid message stream id");
6612     return false;
6613   }
6614   return true;
6615 }
6616 
6617 OperandMatchResultTy
6618 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6619   using namespace llvm::AMDGPU::SendMsg;
6620 
6621   int64_t ImmVal = 0;
6622   SMLoc Loc = getLoc();
6623 
6624   if (trySkipId("sendmsg", AsmToken::LParen)) {
6625     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6626     OperandInfoTy Op(OP_NONE_);
6627     OperandInfoTy Stream(STREAM_ID_NONE_);
6628     if (parseSendMsgBody(Msg, Op, Stream) &&
6629         validateSendMsg(Msg, Op, Stream)) {
6630       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6631     } else {
6632       return MatchOperand_ParseFail;
6633     }
6634   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6635     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6636       Error(Loc, "invalid immediate: only 16-bit values are legal");
6637       return MatchOperand_ParseFail;
6638     }
6639   } else {
6640     return MatchOperand_ParseFail;
6641   }
6642 
6643   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6644   return MatchOperand_Success;
6645 }
6646 
6647 bool AMDGPUOperand::isSendMsg() const {
6648   return isImmTy(ImmTySendMsg);
6649 }
6650 
6651 //===----------------------------------------------------------------------===//
6652 // v_interp
6653 //===----------------------------------------------------------------------===//
6654 
6655 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6656   StringRef Str;
6657   SMLoc S = getLoc();
6658 
6659   if (!parseId(Str))
6660     return MatchOperand_NoMatch;
6661 
6662   int Slot = StringSwitch<int>(Str)
6663     .Case("p10", 0)
6664     .Case("p20", 1)
6665     .Case("p0", 2)
6666     .Default(-1);
6667 
6668   if (Slot == -1) {
6669     Error(S, "invalid interpolation slot");
6670     return MatchOperand_ParseFail;
6671   }
6672 
6673   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6674                                               AMDGPUOperand::ImmTyInterpSlot));
6675   return MatchOperand_Success;
6676 }
6677 
6678 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6679   StringRef Str;
6680   SMLoc S = getLoc();
6681 
6682   if (!parseId(Str))
6683     return MatchOperand_NoMatch;
6684 
6685   if (!Str.startswith("attr")) {
6686     Error(S, "invalid interpolation attribute");
6687     return MatchOperand_ParseFail;
6688   }
6689 
6690   StringRef Chan = Str.take_back(2);
6691   int AttrChan = StringSwitch<int>(Chan)
6692     .Case(".x", 0)
6693     .Case(".y", 1)
6694     .Case(".z", 2)
6695     .Case(".w", 3)
6696     .Default(-1);
6697   if (AttrChan == -1) {
6698     Error(S, "invalid or missing interpolation attribute channel");
6699     return MatchOperand_ParseFail;
6700   }
6701 
6702   Str = Str.drop_back(2).drop_front(4);
6703 
6704   uint8_t Attr;
6705   if (Str.getAsInteger(10, Attr)) {
6706     Error(S, "invalid or missing interpolation attribute number");
6707     return MatchOperand_ParseFail;
6708   }
6709 
6710   if (Attr > 63) {
6711     Error(S, "out of bounds interpolation attribute number");
6712     return MatchOperand_ParseFail;
6713   }
6714 
6715   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6716 
6717   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6718                                               AMDGPUOperand::ImmTyInterpAttr));
6719   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6720                                               AMDGPUOperand::ImmTyAttrChan));
6721   return MatchOperand_Success;
6722 }
6723 
6724 //===----------------------------------------------------------------------===//
6725 // exp
6726 //===----------------------------------------------------------------------===//
6727 
6728 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6729   using namespace llvm::AMDGPU::Exp;
6730 
6731   StringRef Str;
6732   SMLoc S = getLoc();
6733 
6734   if (!parseId(Str))
6735     return MatchOperand_NoMatch;
6736 
6737   unsigned Id = getTgtId(Str);
6738   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6739     Error(S, (Id == ET_INVALID) ?
6740                 "invalid exp target" :
6741                 "exp target is not supported on this GPU");
6742     return MatchOperand_ParseFail;
6743   }
6744 
6745   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6746                                               AMDGPUOperand::ImmTyExpTgt));
6747   return MatchOperand_Success;
6748 }
6749 
6750 //===----------------------------------------------------------------------===//
6751 // parser helpers
6752 //===----------------------------------------------------------------------===//
6753 
6754 bool
6755 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6756   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6757 }
6758 
6759 bool
6760 AMDGPUAsmParser::isId(const StringRef Id) const {
6761   return isId(getToken(), Id);
6762 }
6763 
6764 bool
6765 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6766   return getTokenKind() == Kind;
6767 }
6768 
6769 bool
6770 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6771   if (isId(Id)) {
6772     lex();
6773     return true;
6774   }
6775   return false;
6776 }
6777 
6778 bool
6779 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6780   if (isToken(AsmToken::Identifier)) {
6781     StringRef Tok = getTokenStr();
6782     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6783       lex();
6784       return true;
6785     }
6786   }
6787   return false;
6788 }
6789 
6790 bool
6791 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6792   if (isId(Id) && peekToken().is(Kind)) {
6793     lex();
6794     lex();
6795     return true;
6796   }
6797   return false;
6798 }
6799 
6800 bool
6801 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6802   if (isToken(Kind)) {
6803     lex();
6804     return true;
6805   }
6806   return false;
6807 }
6808 
6809 bool
6810 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6811                            const StringRef ErrMsg) {
6812   if (!trySkipToken(Kind)) {
6813     Error(getLoc(), ErrMsg);
6814     return false;
6815   }
6816   return true;
6817 }
6818 
6819 bool
6820 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6821   SMLoc S = getLoc();
6822 
6823   const MCExpr *Expr;
6824   if (Parser.parseExpression(Expr))
6825     return false;
6826 
6827   if (Expr->evaluateAsAbsolute(Imm))
6828     return true;
6829 
6830   if (Expected.empty()) {
6831     Error(S, "expected absolute expression");
6832   } else {
6833     Error(S, Twine("expected ", Expected) +
6834              Twine(" or an absolute expression"));
6835   }
6836   return false;
6837 }
6838 
6839 bool
6840 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6841   SMLoc S = getLoc();
6842 
6843   const MCExpr *Expr;
6844   if (Parser.parseExpression(Expr))
6845     return false;
6846 
6847   int64_t IntVal;
6848   if (Expr->evaluateAsAbsolute(IntVal)) {
6849     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6850   } else {
6851     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6852   }
6853   return true;
6854 }
6855 
6856 bool
6857 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6858   if (isToken(AsmToken::String)) {
6859     Val = getToken().getStringContents();
6860     lex();
6861     return true;
6862   } else {
6863     Error(getLoc(), ErrMsg);
6864     return false;
6865   }
6866 }
6867 
6868 bool
6869 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6870   if (isToken(AsmToken::Identifier)) {
6871     Val = getTokenStr();
6872     lex();
6873     return true;
6874   } else {
6875     if (!ErrMsg.empty())
6876       Error(getLoc(), ErrMsg);
6877     return false;
6878   }
6879 }
6880 
6881 AsmToken
6882 AMDGPUAsmParser::getToken() const {
6883   return Parser.getTok();
6884 }
6885 
6886 AsmToken
6887 AMDGPUAsmParser::peekToken() {
6888   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6889 }
6890 
6891 void
6892 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6893   auto TokCount = getLexer().peekTokens(Tokens);
6894 
6895   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6896     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6897 }
6898 
6899 AsmToken::TokenKind
6900 AMDGPUAsmParser::getTokenKind() const {
6901   return getLexer().getKind();
6902 }
6903 
6904 SMLoc
6905 AMDGPUAsmParser::getLoc() const {
6906   return getToken().getLoc();
6907 }
6908 
6909 StringRef
6910 AMDGPUAsmParser::getTokenStr() const {
6911   return getToken().getString();
6912 }
6913 
6914 void
6915 AMDGPUAsmParser::lex() {
6916   Parser.Lex();
6917 }
6918 
6919 SMLoc
6920 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6921                                const OperandVector &Operands) const {
6922   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6923     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6924     if (Test(Op))
6925       return Op.getStartLoc();
6926   }
6927   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6928 }
6929 
6930 SMLoc
6931 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6932                            const OperandVector &Operands) const {
6933   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6934   return getOperandLoc(Test, Operands);
6935 }
6936 
6937 SMLoc
6938 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6939                            const OperandVector &Operands) const {
6940   auto Test = [=](const AMDGPUOperand& Op) {
6941     return Op.isRegKind() && Op.getReg() == Reg;
6942   };
6943   return getOperandLoc(Test, Operands);
6944 }
6945 
6946 SMLoc
6947 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6948   auto Test = [](const AMDGPUOperand& Op) {
6949     return Op.IsImmKindLiteral() || Op.isExpr();
6950   };
6951   return getOperandLoc(Test, Operands);
6952 }
6953 
6954 SMLoc
6955 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6956   auto Test = [](const AMDGPUOperand& Op) {
6957     return Op.isImmKindConst();
6958   };
6959   return getOperandLoc(Test, Operands);
6960 }
6961 
6962 //===----------------------------------------------------------------------===//
6963 // swizzle
6964 //===----------------------------------------------------------------------===//
6965 
6966 LLVM_READNONE
6967 static unsigned
6968 encodeBitmaskPerm(const unsigned AndMask,
6969                   const unsigned OrMask,
6970                   const unsigned XorMask) {
6971   using namespace llvm::AMDGPU::Swizzle;
6972 
6973   return BITMASK_PERM_ENC |
6974          (AndMask << BITMASK_AND_SHIFT) |
6975          (OrMask  << BITMASK_OR_SHIFT)  |
6976          (XorMask << BITMASK_XOR_SHIFT);
6977 }
6978 
6979 bool
6980 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6981                                      const unsigned MinVal,
6982                                      const unsigned MaxVal,
6983                                      const StringRef ErrMsg,
6984                                      SMLoc &Loc) {
6985   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6986     return false;
6987   }
6988   Loc = getLoc();
6989   if (!parseExpr(Op)) {
6990     return false;
6991   }
6992   if (Op < MinVal || Op > MaxVal) {
6993     Error(Loc, ErrMsg);
6994     return false;
6995   }
6996 
6997   return true;
6998 }
6999 
7000 bool
7001 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7002                                       const unsigned MinVal,
7003                                       const unsigned MaxVal,
7004                                       const StringRef ErrMsg) {
7005   SMLoc Loc;
7006   for (unsigned i = 0; i < OpNum; ++i) {
7007     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7008       return false;
7009   }
7010 
7011   return true;
7012 }
7013 
7014 bool
7015 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7016   using namespace llvm::AMDGPU::Swizzle;
7017 
7018   int64_t Lane[LANE_NUM];
7019   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7020                            "expected a 2-bit lane id")) {
7021     Imm = QUAD_PERM_ENC;
7022     for (unsigned I = 0; I < LANE_NUM; ++I) {
7023       Imm |= Lane[I] << (LANE_SHIFT * I);
7024     }
7025     return true;
7026   }
7027   return false;
7028 }
7029 
7030 bool
7031 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7032   using namespace llvm::AMDGPU::Swizzle;
7033 
7034   SMLoc Loc;
7035   int64_t GroupSize;
7036   int64_t LaneIdx;
7037 
7038   if (!parseSwizzleOperand(GroupSize,
7039                            2, 32,
7040                            "group size must be in the interval [2,32]",
7041                            Loc)) {
7042     return false;
7043   }
7044   if (!isPowerOf2_64(GroupSize)) {
7045     Error(Loc, "group size must be a power of two");
7046     return false;
7047   }
7048   if (parseSwizzleOperand(LaneIdx,
7049                           0, GroupSize - 1,
7050                           "lane id must be in the interval [0,group size - 1]",
7051                           Loc)) {
7052     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7053     return true;
7054   }
7055   return false;
7056 }
7057 
7058 bool
7059 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7060   using namespace llvm::AMDGPU::Swizzle;
7061 
7062   SMLoc Loc;
7063   int64_t GroupSize;
7064 
7065   if (!parseSwizzleOperand(GroupSize,
7066                            2, 32,
7067                            "group size must be in the interval [2,32]",
7068                            Loc)) {
7069     return false;
7070   }
7071   if (!isPowerOf2_64(GroupSize)) {
7072     Error(Loc, "group size must be a power of two");
7073     return false;
7074   }
7075 
7076   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7077   return true;
7078 }
7079 
7080 bool
7081 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7082   using namespace llvm::AMDGPU::Swizzle;
7083 
7084   SMLoc Loc;
7085   int64_t GroupSize;
7086 
7087   if (!parseSwizzleOperand(GroupSize,
7088                            1, 16,
7089                            "group size must be in the interval [1,16]",
7090                            Loc)) {
7091     return false;
7092   }
7093   if (!isPowerOf2_64(GroupSize)) {
7094     Error(Loc, "group size must be a power of two");
7095     return false;
7096   }
7097 
7098   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7099   return true;
7100 }
7101 
7102 bool
7103 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7104   using namespace llvm::AMDGPU::Swizzle;
7105 
7106   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7107     return false;
7108   }
7109 
7110   StringRef Ctl;
7111   SMLoc StrLoc = getLoc();
7112   if (!parseString(Ctl)) {
7113     return false;
7114   }
7115   if (Ctl.size() != BITMASK_WIDTH) {
7116     Error(StrLoc, "expected a 5-character mask");
7117     return false;
7118   }
7119 
7120   unsigned AndMask = 0;
7121   unsigned OrMask = 0;
7122   unsigned XorMask = 0;
7123 
7124   for (size_t i = 0; i < Ctl.size(); ++i) {
7125     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7126     switch(Ctl[i]) {
7127     default:
7128       Error(StrLoc, "invalid mask");
7129       return false;
7130     case '0':
7131       break;
7132     case '1':
7133       OrMask |= Mask;
7134       break;
7135     case 'p':
7136       AndMask |= Mask;
7137       break;
7138     case 'i':
7139       AndMask |= Mask;
7140       XorMask |= Mask;
7141       break;
7142     }
7143   }
7144 
7145   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7146   return true;
7147 }
7148 
7149 bool
7150 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7151 
7152   SMLoc OffsetLoc = getLoc();
7153 
7154   if (!parseExpr(Imm, "a swizzle macro")) {
7155     return false;
7156   }
7157   if (!isUInt<16>(Imm)) {
7158     Error(OffsetLoc, "expected a 16-bit offset");
7159     return false;
7160   }
7161   return true;
7162 }
7163 
7164 bool
7165 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7166   using namespace llvm::AMDGPU::Swizzle;
7167 
7168   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7169 
7170     SMLoc ModeLoc = getLoc();
7171     bool Ok = false;
7172 
7173     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7174       Ok = parseSwizzleQuadPerm(Imm);
7175     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7176       Ok = parseSwizzleBitmaskPerm(Imm);
7177     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7178       Ok = parseSwizzleBroadcast(Imm);
7179     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7180       Ok = parseSwizzleSwap(Imm);
7181     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7182       Ok = parseSwizzleReverse(Imm);
7183     } else {
7184       Error(ModeLoc, "expected a swizzle mode");
7185     }
7186 
7187     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7188   }
7189 
7190   return false;
7191 }
7192 
7193 OperandMatchResultTy
7194 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7195   SMLoc S = getLoc();
7196   int64_t Imm = 0;
7197 
7198   if (trySkipId("offset")) {
7199 
7200     bool Ok = false;
7201     if (skipToken(AsmToken::Colon, "expected a colon")) {
7202       if (trySkipId("swizzle")) {
7203         Ok = parseSwizzleMacro(Imm);
7204       } else {
7205         Ok = parseSwizzleOffset(Imm);
7206       }
7207     }
7208 
7209     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7210 
7211     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7212   } else {
7213     // Swizzle "offset" operand is optional.
7214     // If it is omitted, try parsing other optional operands.
7215     return parseOptionalOpr(Operands);
7216   }
7217 }
7218 
7219 bool
7220 AMDGPUOperand::isSwizzle() const {
7221   return isImmTy(ImmTySwizzle);
7222 }
7223 
7224 //===----------------------------------------------------------------------===//
7225 // VGPR Index Mode
7226 //===----------------------------------------------------------------------===//
7227 
7228 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7229 
7230   using namespace llvm::AMDGPU::VGPRIndexMode;
7231 
7232   if (trySkipToken(AsmToken::RParen)) {
7233     return OFF;
7234   }
7235 
7236   int64_t Imm = 0;
7237 
7238   while (true) {
7239     unsigned Mode = 0;
7240     SMLoc S = getLoc();
7241 
7242     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7243       if (trySkipId(IdSymbolic[ModeId])) {
7244         Mode = 1 << ModeId;
7245         break;
7246       }
7247     }
7248 
7249     if (Mode == 0) {
7250       Error(S, (Imm == 0)?
7251                "expected a VGPR index mode or a closing parenthesis" :
7252                "expected a VGPR index mode");
7253       return UNDEF;
7254     }
7255 
7256     if (Imm & Mode) {
7257       Error(S, "duplicate VGPR index mode");
7258       return UNDEF;
7259     }
7260     Imm |= Mode;
7261 
7262     if (trySkipToken(AsmToken::RParen))
7263       break;
7264     if (!skipToken(AsmToken::Comma,
7265                    "expected a comma or a closing parenthesis"))
7266       return UNDEF;
7267   }
7268 
7269   return Imm;
7270 }
7271 
7272 OperandMatchResultTy
7273 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7274 
7275   using namespace llvm::AMDGPU::VGPRIndexMode;
7276 
7277   int64_t Imm = 0;
7278   SMLoc S = getLoc();
7279 
7280   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7281     Imm = parseGPRIdxMacro();
7282     if (Imm == UNDEF)
7283       return MatchOperand_ParseFail;
7284   } else {
7285     if (getParser().parseAbsoluteExpression(Imm))
7286       return MatchOperand_ParseFail;
7287     if (Imm < 0 || !isUInt<4>(Imm)) {
7288       Error(S, "invalid immediate: only 4-bit values are legal");
7289       return MatchOperand_ParseFail;
7290     }
7291   }
7292 
7293   Operands.push_back(
7294       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7295   return MatchOperand_Success;
7296 }
7297 
7298 bool AMDGPUOperand::isGPRIdxMode() const {
7299   return isImmTy(ImmTyGprIdxMode);
7300 }
7301 
7302 //===----------------------------------------------------------------------===//
7303 // sopp branch targets
7304 //===----------------------------------------------------------------------===//
7305 
7306 OperandMatchResultTy
7307 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7308 
7309   // Make sure we are not parsing something
7310   // that looks like a label or an expression but is not.
7311   // This will improve error messages.
7312   if (isRegister() || isModifier())
7313     return MatchOperand_NoMatch;
7314 
7315   if (!parseExpr(Operands))
7316     return MatchOperand_ParseFail;
7317 
7318   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7319   assert(Opr.isImm() || Opr.isExpr());
7320   SMLoc Loc = Opr.getStartLoc();
7321 
7322   // Currently we do not support arbitrary expressions as branch targets.
7323   // Only labels and absolute expressions are accepted.
7324   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7325     Error(Loc, "expected an absolute expression or a label");
7326   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7327     Error(Loc, "expected a 16-bit signed jump offset");
7328   }
7329 
7330   return MatchOperand_Success;
7331 }
7332 
7333 //===----------------------------------------------------------------------===//
7334 // Boolean holding registers
7335 //===----------------------------------------------------------------------===//
7336 
7337 OperandMatchResultTy
7338 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7339   return parseReg(Operands);
7340 }
7341 
7342 //===----------------------------------------------------------------------===//
7343 // mubuf
7344 //===----------------------------------------------------------------------===//
7345 
7346 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7347   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7348 }
7349 
7350 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7351                                    const OperandVector &Operands,
7352                                    bool IsAtomic,
7353                                    bool IsLds) {
7354   bool IsLdsOpcode = IsLds;
7355   bool HasLdsModifier = false;
7356   OptionalImmIndexMap OptionalIdx;
7357   unsigned FirstOperandIdx = 1;
7358   bool IsAtomicReturn = false;
7359 
7360   if (IsAtomic) {
7361     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7362       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7363       if (!Op.isCPol())
7364         continue;
7365       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7366       break;
7367     }
7368 
7369     if (!IsAtomicReturn) {
7370       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7371       if (NewOpc != -1)
7372         Inst.setOpcode(NewOpc);
7373     }
7374 
7375     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7376                       SIInstrFlags::IsAtomicRet;
7377   }
7378 
7379   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7380     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7381 
7382     // Add the register arguments
7383     if (Op.isReg()) {
7384       Op.addRegOperands(Inst, 1);
7385       // Insert a tied src for atomic return dst.
7386       // This cannot be postponed as subsequent calls to
7387       // addImmOperands rely on correct number of MC operands.
7388       if (IsAtomicReturn && i == FirstOperandIdx)
7389         Op.addRegOperands(Inst, 1);
7390       continue;
7391     }
7392 
7393     // Handle the case where soffset is an immediate
7394     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7395       Op.addImmOperands(Inst, 1);
7396       continue;
7397     }
7398 
7399     HasLdsModifier |= Op.isLDS();
7400 
7401     // Handle tokens like 'offen' which are sometimes hard-coded into the
7402     // asm string.  There are no MCInst operands for these.
7403     if (Op.isToken()) {
7404       continue;
7405     }
7406     assert(Op.isImm());
7407 
7408     // Handle optional arguments
7409     OptionalIdx[Op.getImmTy()] = i;
7410   }
7411 
7412   // This is a workaround for an llvm quirk which may result in an
7413   // incorrect instruction selection. Lds and non-lds versions of
7414   // MUBUF instructions are identical except that lds versions
7415   // have mandatory 'lds' modifier. However this modifier follows
7416   // optional modifiers and llvm asm matcher regards this 'lds'
7417   // modifier as an optional one. As a result, an lds version
7418   // of opcode may be selected even if it has no 'lds' modifier.
7419   if (IsLdsOpcode && !HasLdsModifier) {
7420     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7421     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7422       Inst.setOpcode(NoLdsOpcode);
7423       IsLdsOpcode = false;
7424     }
7425   }
7426 
7427   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7428   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7429 
7430   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7431     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7432   }
7433   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7434 }
7435 
7436 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7437   OptionalImmIndexMap OptionalIdx;
7438 
7439   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7440     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7441 
7442     // Add the register arguments
7443     if (Op.isReg()) {
7444       Op.addRegOperands(Inst, 1);
7445       continue;
7446     }
7447 
7448     // Handle the case where soffset is an immediate
7449     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7450       Op.addImmOperands(Inst, 1);
7451       continue;
7452     }
7453 
7454     // Handle tokens like 'offen' which are sometimes hard-coded into the
7455     // asm string.  There are no MCInst operands for these.
7456     if (Op.isToken()) {
7457       continue;
7458     }
7459     assert(Op.isImm());
7460 
7461     // Handle optional arguments
7462     OptionalIdx[Op.getImmTy()] = i;
7463   }
7464 
7465   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7466                         AMDGPUOperand::ImmTyOffset);
7467   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7468   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7469   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7470   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7471 }
7472 
7473 //===----------------------------------------------------------------------===//
7474 // mimg
7475 //===----------------------------------------------------------------------===//
7476 
7477 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7478                               bool IsAtomic) {
7479   unsigned I = 1;
7480   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7481   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7482     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7483   }
7484 
7485   if (IsAtomic) {
7486     // Add src, same as dst
7487     assert(Desc.getNumDefs() == 1);
7488     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7489   }
7490 
7491   OptionalImmIndexMap OptionalIdx;
7492 
7493   for (unsigned E = Operands.size(); I != E; ++I) {
7494     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7495 
7496     // Add the register arguments
7497     if (Op.isReg()) {
7498       Op.addRegOperands(Inst, 1);
7499     } else if (Op.isImmModifier()) {
7500       OptionalIdx[Op.getImmTy()] = I;
7501     } else if (!Op.isToken()) {
7502       llvm_unreachable("unexpected operand type");
7503     }
7504   }
7505 
7506   bool IsGFX10Plus = isGFX10Plus();
7507 
7508   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7509   if (IsGFX10Plus)
7510     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7511   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7512   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7513   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7514   if (IsGFX10Plus)
7515     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7516   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7517     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7518   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7519   if (!IsGFX10Plus)
7520     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7521   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7522 }
7523 
7524 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7525   cvtMIMG(Inst, Operands, true);
7526 }
7527 
7528 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7529   OptionalImmIndexMap OptionalIdx;
7530   bool IsAtomicReturn = false;
7531 
7532   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7533     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7534     if (!Op.isCPol())
7535       continue;
7536     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7537     break;
7538   }
7539 
7540   if (!IsAtomicReturn) {
7541     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7542     if (NewOpc != -1)
7543       Inst.setOpcode(NewOpc);
7544   }
7545 
7546   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7547                     SIInstrFlags::IsAtomicRet;
7548 
7549   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7550     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7551 
7552     // Add the register arguments
7553     if (Op.isReg()) {
7554       Op.addRegOperands(Inst, 1);
7555       if (IsAtomicReturn && i == 1)
7556         Op.addRegOperands(Inst, 1);
7557       continue;
7558     }
7559 
7560     // Handle the case where soffset is an immediate
7561     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7562       Op.addImmOperands(Inst, 1);
7563       continue;
7564     }
7565 
7566     // Handle tokens like 'offen' which are sometimes hard-coded into the
7567     // asm string.  There are no MCInst operands for these.
7568     if (Op.isToken()) {
7569       continue;
7570     }
7571     assert(Op.isImm());
7572 
7573     // Handle optional arguments
7574     OptionalIdx[Op.getImmTy()] = i;
7575   }
7576 
7577   if ((int)Inst.getNumOperands() <=
7578       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7579     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7580   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7581 }
7582 
7583 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7584                                       const OperandVector &Operands) {
7585   for (unsigned I = 1; I < Operands.size(); ++I) {
7586     auto &Operand = (AMDGPUOperand &)*Operands[I];
7587     if (Operand.isReg())
7588       Operand.addRegOperands(Inst, 1);
7589   }
7590 
7591   Inst.addOperand(MCOperand::createImm(1)); // a16
7592 }
7593 
7594 //===----------------------------------------------------------------------===//
7595 // smrd
7596 //===----------------------------------------------------------------------===//
7597 
7598 bool AMDGPUOperand::isSMRDOffset8() const {
7599   return isImm() && isUInt<8>(getImm());
7600 }
7601 
7602 bool AMDGPUOperand::isSMEMOffset() const {
7603   return isImm(); // Offset range is checked later by validator.
7604 }
7605 
7606 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7607   // 32-bit literals are only supported on CI and we only want to use them
7608   // when the offset is > 8-bits.
7609   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7610 }
7611 
7612 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7613   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7614 }
7615 
7616 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7617   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7618 }
7619 
7620 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7621   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7622 }
7623 
7624 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7625   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7626 }
7627 
7628 //===----------------------------------------------------------------------===//
7629 // vop3
7630 //===----------------------------------------------------------------------===//
7631 
7632 static bool ConvertOmodMul(int64_t &Mul) {
7633   if (Mul != 1 && Mul != 2 && Mul != 4)
7634     return false;
7635 
7636   Mul >>= 1;
7637   return true;
7638 }
7639 
7640 static bool ConvertOmodDiv(int64_t &Div) {
7641   if (Div == 1) {
7642     Div = 0;
7643     return true;
7644   }
7645 
7646   if (Div == 2) {
7647     Div = 3;
7648     return true;
7649   }
7650 
7651   return false;
7652 }
7653 
7654 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7655 // This is intentional and ensures compatibility with sp3.
7656 // See bug 35397 for details.
7657 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7658   if (BoundCtrl == 0 || BoundCtrl == 1) {
7659     BoundCtrl = 1;
7660     return true;
7661   }
7662   return false;
7663 }
7664 
7665 // Note: the order in this table matches the order of operands in AsmString.
7666 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7667   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7668   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7669   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7670   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7671   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7672   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7673   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7674   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7675   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7676   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7677   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7678   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7679   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7680   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7681   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7682   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7683   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7684   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7685   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7686   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7687   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7688   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7689   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7690   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7691   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7692   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7693   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7694   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7695   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7696   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7697   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7698   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7699   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7700   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7701   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7702   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7703   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7704   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7705   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7706   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7707   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7708 };
7709 
7710 void AMDGPUAsmParser::onBeginOfFile() {
7711   if (!getParser().getStreamer().getTargetStreamer() ||
7712       getSTI().getTargetTriple().getArch() == Triple::r600)
7713     return;
7714 
7715   if (!getTargetStreamer().getTargetID())
7716     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7717 
7718   if (isHsaAbiVersion3AndAbove(&getSTI()))
7719     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7720 }
7721 
7722 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7723 
7724   OperandMatchResultTy res = parseOptionalOpr(Operands);
7725 
7726   // This is a hack to enable hardcoded mandatory operands which follow
7727   // optional operands.
7728   //
7729   // Current design assumes that all operands after the first optional operand
7730   // are also optional. However implementation of some instructions violates
7731   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7732   //
7733   // To alleviate this problem, we have to (implicitly) parse extra operands
7734   // to make sure autogenerated parser of custom operands never hit hardcoded
7735   // mandatory operands.
7736 
7737   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7738     if (res != MatchOperand_Success ||
7739         isToken(AsmToken::EndOfStatement))
7740       break;
7741 
7742     trySkipToken(AsmToken::Comma);
7743     res = parseOptionalOpr(Operands);
7744   }
7745 
7746   return res;
7747 }
7748 
7749 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7750   OperandMatchResultTy res;
7751   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7752     // try to parse any optional operand here
7753     if (Op.IsBit) {
7754       res = parseNamedBit(Op.Name, Operands, Op.Type);
7755     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7756       res = parseOModOperand(Operands);
7757     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7758                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7759                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7760       res = parseSDWASel(Operands, Op.Name, Op.Type);
7761     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7762       res = parseSDWADstUnused(Operands);
7763     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7764                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7765                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7766                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7767       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7768                                         Op.ConvertResult);
7769     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7770       res = parseDim(Operands);
7771     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7772       res = parseCPol(Operands);
7773     } else {
7774       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7775       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7776         res = parseOperandArrayWithPrefix("neg", Operands,
7777                                           AMDGPUOperand::ImmTyBLGP,
7778                                           nullptr);
7779       }
7780     }
7781     if (res != MatchOperand_NoMatch) {
7782       return res;
7783     }
7784   }
7785   return MatchOperand_NoMatch;
7786 }
7787 
7788 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7789   StringRef Name = getTokenStr();
7790   if (Name == "mul") {
7791     return parseIntWithPrefix("mul", Operands,
7792                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7793   }
7794 
7795   if (Name == "div") {
7796     return parseIntWithPrefix("div", Operands,
7797                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7798   }
7799 
7800   return MatchOperand_NoMatch;
7801 }
7802 
7803 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7804   cvtVOP3P(Inst, Operands);
7805 
7806   int Opc = Inst.getOpcode();
7807 
7808   int SrcNum;
7809   const int Ops[] = { AMDGPU::OpName::src0,
7810                       AMDGPU::OpName::src1,
7811                       AMDGPU::OpName::src2 };
7812   for (SrcNum = 0;
7813        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7814        ++SrcNum);
7815   assert(SrcNum > 0);
7816 
7817   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7818   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7819 
7820   if ((OpSel & (1 << SrcNum)) != 0) {
7821     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7822     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7823     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7824   }
7825 }
7826 
7827 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7828       // 1. This operand is input modifiers
7829   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7830       // 2. This is not last operand
7831       && Desc.NumOperands > (OpNum + 1)
7832       // 3. Next operand is register class
7833       && Desc.OpInfo[OpNum + 1].RegClass != -1
7834       // 4. Next register is not tied to any other operand
7835       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7836 }
7837 
7838 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7839 {
7840   OptionalImmIndexMap OptionalIdx;
7841   unsigned Opc = Inst.getOpcode();
7842 
7843   unsigned I = 1;
7844   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7845   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7846     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7847   }
7848 
7849   for (unsigned E = Operands.size(); I != E; ++I) {
7850     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7851     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7852       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7853     } else if (Op.isInterpSlot() ||
7854                Op.isInterpAttr() ||
7855                Op.isAttrChan()) {
7856       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7857     } else if (Op.isImmModifier()) {
7858       OptionalIdx[Op.getImmTy()] = I;
7859     } else {
7860       llvm_unreachable("unhandled operand type");
7861     }
7862   }
7863 
7864   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7865     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7866   }
7867 
7868   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7869     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7870   }
7871 
7872   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7873     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7874   }
7875 }
7876 
7877 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7878                               OptionalImmIndexMap &OptionalIdx) {
7879   unsigned Opc = Inst.getOpcode();
7880 
7881   unsigned I = 1;
7882   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7883   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7884     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7885   }
7886 
7887   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7888     // This instruction has src modifiers
7889     for (unsigned E = Operands.size(); I != E; ++I) {
7890       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7891       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7892         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7893       } else if (Op.isImmModifier()) {
7894         OptionalIdx[Op.getImmTy()] = I;
7895       } else if (Op.isRegOrImm()) {
7896         Op.addRegOrImmOperands(Inst, 1);
7897       } else {
7898         llvm_unreachable("unhandled operand type");
7899       }
7900     }
7901   } else {
7902     // No src modifiers
7903     for (unsigned E = Operands.size(); I != E; ++I) {
7904       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7905       if (Op.isMod()) {
7906         OptionalIdx[Op.getImmTy()] = I;
7907       } else {
7908         Op.addRegOrImmOperands(Inst, 1);
7909       }
7910     }
7911   }
7912 
7913   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7914     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7915   }
7916 
7917   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7918     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7919   }
7920 
7921   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7922   // it has src2 register operand that is tied to dst operand
7923   // we don't allow modifiers for this operand in assembler so src2_modifiers
7924   // should be 0.
7925   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7926       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7927       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7928       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7929       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7930       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7931       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7932       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7933       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7934       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7935       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7936     auto it = Inst.begin();
7937     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7938     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7939     ++it;
7940     // Copy the operand to ensure it's not invalidated when Inst grows.
7941     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7942   }
7943 }
7944 
7945 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7946   OptionalImmIndexMap OptionalIdx;
7947   cvtVOP3(Inst, Operands, OptionalIdx);
7948 }
7949 
7950 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7951                                OptionalImmIndexMap &OptIdx) {
7952   const int Opc = Inst.getOpcode();
7953   const MCInstrDesc &Desc = MII.get(Opc);
7954 
7955   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7956 
7957   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7958     assert(!IsPacked);
7959     Inst.addOperand(Inst.getOperand(0));
7960   }
7961 
7962   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7963   // instruction, and then figure out where to actually put the modifiers
7964 
7965   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7966   if (OpSelIdx != -1) {
7967     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7968   }
7969 
7970   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7971   if (OpSelHiIdx != -1) {
7972     int DefaultVal = IsPacked ? -1 : 0;
7973     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7974                           DefaultVal);
7975   }
7976 
7977   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7978   if (NegLoIdx != -1) {
7979     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7980     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7981   }
7982 
7983   const int Ops[] = { AMDGPU::OpName::src0,
7984                       AMDGPU::OpName::src1,
7985                       AMDGPU::OpName::src2 };
7986   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7987                          AMDGPU::OpName::src1_modifiers,
7988                          AMDGPU::OpName::src2_modifiers };
7989 
7990   unsigned OpSel = 0;
7991   unsigned OpSelHi = 0;
7992   unsigned NegLo = 0;
7993   unsigned NegHi = 0;
7994 
7995   if (OpSelIdx != -1)
7996     OpSel = Inst.getOperand(OpSelIdx).getImm();
7997 
7998   if (OpSelHiIdx != -1)
7999     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8000 
8001   if (NegLoIdx != -1) {
8002     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8003     NegLo = Inst.getOperand(NegLoIdx).getImm();
8004     NegHi = Inst.getOperand(NegHiIdx).getImm();
8005   }
8006 
8007   for (int J = 0; J < 3; ++J) {
8008     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8009     if (OpIdx == -1)
8010       break;
8011 
8012     uint32_t ModVal = 0;
8013 
8014     if ((OpSel & (1 << J)) != 0)
8015       ModVal |= SISrcMods::OP_SEL_0;
8016 
8017     if ((OpSelHi & (1 << J)) != 0)
8018       ModVal |= SISrcMods::OP_SEL_1;
8019 
8020     if ((NegLo & (1 << J)) != 0)
8021       ModVal |= SISrcMods::NEG;
8022 
8023     if ((NegHi & (1 << J)) != 0)
8024       ModVal |= SISrcMods::NEG_HI;
8025 
8026     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8027 
8028     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8029   }
8030 }
8031 
8032 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8033   OptionalImmIndexMap OptIdx;
8034   cvtVOP3(Inst, Operands, OptIdx);
8035   cvtVOP3P(Inst, Operands, OptIdx);
8036 }
8037 
8038 //===----------------------------------------------------------------------===//
8039 // dpp
8040 //===----------------------------------------------------------------------===//
8041 
8042 bool AMDGPUOperand::isDPP8() const {
8043   return isImmTy(ImmTyDPP8);
8044 }
8045 
8046 bool AMDGPUOperand::isDPPCtrl() const {
8047   using namespace AMDGPU::DPP;
8048 
8049   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8050   if (result) {
8051     int64_t Imm = getImm();
8052     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8053            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8054            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8055            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8056            (Imm == DppCtrl::WAVE_SHL1) ||
8057            (Imm == DppCtrl::WAVE_ROL1) ||
8058            (Imm == DppCtrl::WAVE_SHR1) ||
8059            (Imm == DppCtrl::WAVE_ROR1) ||
8060            (Imm == DppCtrl::ROW_MIRROR) ||
8061            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8062            (Imm == DppCtrl::BCAST15) ||
8063            (Imm == DppCtrl::BCAST31) ||
8064            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8065            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8066   }
8067   return false;
8068 }
8069 
8070 //===----------------------------------------------------------------------===//
8071 // mAI
8072 //===----------------------------------------------------------------------===//
8073 
8074 bool AMDGPUOperand::isBLGP() const {
8075   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8076 }
8077 
8078 bool AMDGPUOperand::isCBSZ() const {
8079   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8080 }
8081 
8082 bool AMDGPUOperand::isABID() const {
8083   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8084 }
8085 
8086 bool AMDGPUOperand::isS16Imm() const {
8087   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8088 }
8089 
8090 bool AMDGPUOperand::isU16Imm() const {
8091   return isImm() && isUInt<16>(getImm());
8092 }
8093 
8094 //===----------------------------------------------------------------------===//
8095 // dim
8096 //===----------------------------------------------------------------------===//
8097 
8098 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8099   // We want to allow "dim:1D" etc.,
8100   // but the initial 1 is tokenized as an integer.
8101   std::string Token;
8102   if (isToken(AsmToken::Integer)) {
8103     SMLoc Loc = getToken().getEndLoc();
8104     Token = std::string(getTokenStr());
8105     lex();
8106     if (getLoc() != Loc)
8107       return false;
8108   }
8109 
8110   StringRef Suffix;
8111   if (!parseId(Suffix))
8112     return false;
8113   Token += Suffix;
8114 
8115   StringRef DimId = Token;
8116   if (DimId.startswith("SQ_RSRC_IMG_"))
8117     DimId = DimId.drop_front(12);
8118 
8119   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8120   if (!DimInfo)
8121     return false;
8122 
8123   Encoding = DimInfo->Encoding;
8124   return true;
8125 }
8126 
8127 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8128   if (!isGFX10Plus())
8129     return MatchOperand_NoMatch;
8130 
8131   SMLoc S = getLoc();
8132 
8133   if (!trySkipId("dim", AsmToken::Colon))
8134     return MatchOperand_NoMatch;
8135 
8136   unsigned Encoding;
8137   SMLoc Loc = getLoc();
8138   if (!parseDimId(Encoding)) {
8139     Error(Loc, "invalid dim value");
8140     return MatchOperand_ParseFail;
8141   }
8142 
8143   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8144                                               AMDGPUOperand::ImmTyDim));
8145   return MatchOperand_Success;
8146 }
8147 
8148 //===----------------------------------------------------------------------===//
8149 // dpp
8150 //===----------------------------------------------------------------------===//
8151 
8152 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8153   SMLoc S = getLoc();
8154 
8155   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8156     return MatchOperand_NoMatch;
8157 
8158   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8159 
8160   int64_t Sels[8];
8161 
8162   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8163     return MatchOperand_ParseFail;
8164 
8165   for (size_t i = 0; i < 8; ++i) {
8166     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8167       return MatchOperand_ParseFail;
8168 
8169     SMLoc Loc = getLoc();
8170     if (getParser().parseAbsoluteExpression(Sels[i]))
8171       return MatchOperand_ParseFail;
8172     if (0 > Sels[i] || 7 < Sels[i]) {
8173       Error(Loc, "expected a 3-bit value");
8174       return MatchOperand_ParseFail;
8175     }
8176   }
8177 
8178   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8179     return MatchOperand_ParseFail;
8180 
8181   unsigned DPP8 = 0;
8182   for (size_t i = 0; i < 8; ++i)
8183     DPP8 |= (Sels[i] << (i * 3));
8184 
8185   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8186   return MatchOperand_Success;
8187 }
8188 
8189 bool
8190 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8191                                     const OperandVector &Operands) {
8192   if (Ctrl == "row_newbcast")
8193     return isGFX90A();
8194 
8195   if (Ctrl == "row_share" ||
8196       Ctrl == "row_xmask")
8197     return isGFX10Plus();
8198 
8199   if (Ctrl == "wave_shl" ||
8200       Ctrl == "wave_shr" ||
8201       Ctrl == "wave_rol" ||
8202       Ctrl == "wave_ror" ||
8203       Ctrl == "row_bcast")
8204     return isVI() || isGFX9();
8205 
8206   return Ctrl == "row_mirror" ||
8207          Ctrl == "row_half_mirror" ||
8208          Ctrl == "quad_perm" ||
8209          Ctrl == "row_shl" ||
8210          Ctrl == "row_shr" ||
8211          Ctrl == "row_ror";
8212 }
8213 
8214 int64_t
8215 AMDGPUAsmParser::parseDPPCtrlPerm() {
8216   // quad_perm:[%d,%d,%d,%d]
8217 
8218   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8219     return -1;
8220 
8221   int64_t Val = 0;
8222   for (int i = 0; i < 4; ++i) {
8223     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8224       return -1;
8225 
8226     int64_t Temp;
8227     SMLoc Loc = getLoc();
8228     if (getParser().parseAbsoluteExpression(Temp))
8229       return -1;
8230     if (Temp < 0 || Temp > 3) {
8231       Error(Loc, "expected a 2-bit value");
8232       return -1;
8233     }
8234 
8235     Val += (Temp << i * 2);
8236   }
8237 
8238   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8239     return -1;
8240 
8241   return Val;
8242 }
8243 
8244 int64_t
8245 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8246   using namespace AMDGPU::DPP;
8247 
8248   // sel:%d
8249 
8250   int64_t Val;
8251   SMLoc Loc = getLoc();
8252 
8253   if (getParser().parseAbsoluteExpression(Val))
8254     return -1;
8255 
8256   struct DppCtrlCheck {
8257     int64_t Ctrl;
8258     int Lo;
8259     int Hi;
8260   };
8261 
8262   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8263     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8264     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8265     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8266     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8267     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8268     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8269     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8270     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8271     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8272     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8273     .Default({-1, 0, 0});
8274 
8275   bool Valid;
8276   if (Check.Ctrl == -1) {
8277     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8278     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8279   } else {
8280     Valid = Check.Lo <= Val && Val <= Check.Hi;
8281     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8282   }
8283 
8284   if (!Valid) {
8285     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8286     return -1;
8287   }
8288 
8289   return Val;
8290 }
8291 
8292 OperandMatchResultTy
8293 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8294   using namespace AMDGPU::DPP;
8295 
8296   if (!isToken(AsmToken::Identifier) ||
8297       !isSupportedDPPCtrl(getTokenStr(), Operands))
8298     return MatchOperand_NoMatch;
8299 
8300   SMLoc S = getLoc();
8301   int64_t Val = -1;
8302   StringRef Ctrl;
8303 
8304   parseId(Ctrl);
8305 
8306   if (Ctrl == "row_mirror") {
8307     Val = DppCtrl::ROW_MIRROR;
8308   } else if (Ctrl == "row_half_mirror") {
8309     Val = DppCtrl::ROW_HALF_MIRROR;
8310   } else {
8311     if (skipToken(AsmToken::Colon, "expected a colon")) {
8312       if (Ctrl == "quad_perm") {
8313         Val = parseDPPCtrlPerm();
8314       } else {
8315         Val = parseDPPCtrlSel(Ctrl);
8316       }
8317     }
8318   }
8319 
8320   if (Val == -1)
8321     return MatchOperand_ParseFail;
8322 
8323   Operands.push_back(
8324     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8325   return MatchOperand_Success;
8326 }
8327 
8328 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8329   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8330 }
8331 
8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8333   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8334 }
8335 
8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8337   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8338 }
8339 
8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8342 }
8343 
8344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8345   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8346 }
8347 
8348 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8349   OptionalImmIndexMap OptionalIdx;
8350 
8351   unsigned Opc = Inst.getOpcode();
8352   bool HasModifiers =
8353       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8354   unsigned I = 1;
8355   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8356   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8357     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8358   }
8359 
8360   int Fi = 0;
8361   for (unsigned E = Operands.size(); I != E; ++I) {
8362     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8363                                             MCOI::TIED_TO);
8364     if (TiedTo != -1) {
8365       assert((unsigned)TiedTo < Inst.getNumOperands());
8366       // handle tied old or src2 for MAC instructions
8367       Inst.addOperand(Inst.getOperand(TiedTo));
8368     }
8369     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8370     // Add the register arguments
8371     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8372       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8373       // Skip it.
8374       continue;
8375     }
8376 
8377     if (IsDPP8) {
8378       if (Op.isDPP8()) {
8379         Op.addImmOperands(Inst, 1);
8380       } else if (HasModifiers &&
8381                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8382         Op.addRegWithFPInputModsOperands(Inst, 2);
8383       } else if (Op.isFI()) {
8384         Fi = Op.getImm();
8385       } else if (Op.isReg()) {
8386         Op.addRegOperands(Inst, 1);
8387       } else {
8388         llvm_unreachable("Invalid operand type");
8389       }
8390     } else {
8391       if (HasModifiers &&
8392           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8393         Op.addRegWithFPInputModsOperands(Inst, 2);
8394       } else if (Op.isReg()) {
8395         Op.addRegOperands(Inst, 1);
8396       } else if (Op.isDPPCtrl()) {
8397         Op.addImmOperands(Inst, 1);
8398       } else if (Op.isImm()) {
8399         // Handle optional arguments
8400         OptionalIdx[Op.getImmTy()] = I;
8401       } else {
8402         llvm_unreachable("Invalid operand type");
8403       }
8404     }
8405   }
8406 
8407   if (IsDPP8) {
8408     using namespace llvm::AMDGPU::DPP;
8409     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8410   } else {
8411     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8412     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8413     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8414     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8415       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8416     }
8417   }
8418 }
8419 
8420 //===----------------------------------------------------------------------===//
8421 // sdwa
8422 //===----------------------------------------------------------------------===//
8423 
8424 OperandMatchResultTy
8425 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8426                               AMDGPUOperand::ImmTy Type) {
8427   using namespace llvm::AMDGPU::SDWA;
8428 
8429   SMLoc S = getLoc();
8430   StringRef Value;
8431   OperandMatchResultTy res;
8432 
8433   SMLoc StringLoc;
8434   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8435   if (res != MatchOperand_Success) {
8436     return res;
8437   }
8438 
8439   int64_t Int;
8440   Int = StringSwitch<int64_t>(Value)
8441         .Case("BYTE_0", SdwaSel::BYTE_0)
8442         .Case("BYTE_1", SdwaSel::BYTE_1)
8443         .Case("BYTE_2", SdwaSel::BYTE_2)
8444         .Case("BYTE_3", SdwaSel::BYTE_3)
8445         .Case("WORD_0", SdwaSel::WORD_0)
8446         .Case("WORD_1", SdwaSel::WORD_1)
8447         .Case("DWORD", SdwaSel::DWORD)
8448         .Default(0xffffffff);
8449 
8450   if (Int == 0xffffffff) {
8451     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8452     return MatchOperand_ParseFail;
8453   }
8454 
8455   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8456   return MatchOperand_Success;
8457 }
8458 
8459 OperandMatchResultTy
8460 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8461   using namespace llvm::AMDGPU::SDWA;
8462 
8463   SMLoc S = getLoc();
8464   StringRef Value;
8465   OperandMatchResultTy res;
8466 
8467   SMLoc StringLoc;
8468   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8469   if (res != MatchOperand_Success) {
8470     return res;
8471   }
8472 
8473   int64_t Int;
8474   Int = StringSwitch<int64_t>(Value)
8475         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8476         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8477         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8478         .Default(0xffffffff);
8479 
8480   if (Int == 0xffffffff) {
8481     Error(StringLoc, "invalid dst_unused value");
8482     return MatchOperand_ParseFail;
8483   }
8484 
8485   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8486   return MatchOperand_Success;
8487 }
8488 
8489 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8490   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8491 }
8492 
8493 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8494   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8495 }
8496 
8497 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8498   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8499 }
8500 
8501 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8502   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8503 }
8504 
8505 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8506   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8507 }
8508 
8509 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8510                               uint64_t BasicInstType,
8511                               bool SkipDstVcc,
8512                               bool SkipSrcVcc) {
8513   using namespace llvm::AMDGPU::SDWA;
8514 
8515   OptionalImmIndexMap OptionalIdx;
8516   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8517   bool SkippedVcc = false;
8518 
8519   unsigned I = 1;
8520   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8521   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8522     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8523   }
8524 
8525   for (unsigned E = Operands.size(); I != E; ++I) {
8526     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8527     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8528         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8529       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8530       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8531       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8532       // Skip VCC only if we didn't skip it on previous iteration.
8533       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8534       if (BasicInstType == SIInstrFlags::VOP2 &&
8535           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8536            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8537         SkippedVcc = true;
8538         continue;
8539       } else if (BasicInstType == SIInstrFlags::VOPC &&
8540                  Inst.getNumOperands() == 0) {
8541         SkippedVcc = true;
8542         continue;
8543       }
8544     }
8545     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8546       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8547     } else if (Op.isImm()) {
8548       // Handle optional arguments
8549       OptionalIdx[Op.getImmTy()] = I;
8550     } else {
8551       llvm_unreachable("Invalid operand type");
8552     }
8553     SkippedVcc = false;
8554   }
8555 
8556   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8557       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8558       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8559     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8560     switch (BasicInstType) {
8561     case SIInstrFlags::VOP1:
8562       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8563       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8564         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8565       }
8566       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8567       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8568       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8569       break;
8570 
8571     case SIInstrFlags::VOP2:
8572       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8573       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8574         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8575       }
8576       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8577       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8578       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8579       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8580       break;
8581 
8582     case SIInstrFlags::VOPC:
8583       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8584         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8585       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8586       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8587       break;
8588 
8589     default:
8590       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8591     }
8592   }
8593 
8594   // special case v_mac_{f16, f32}:
8595   // it has src2 register operand that is tied to dst operand
8596   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8597       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8598     auto it = Inst.begin();
8599     std::advance(
8600       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8601     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8602   }
8603 }
8604 
8605 //===----------------------------------------------------------------------===//
8606 // mAI
8607 //===----------------------------------------------------------------------===//
8608 
8609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8610   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8611 }
8612 
8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8614   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8615 }
8616 
8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8618   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8619 }
8620 
8621 /// Force static initialization.
8622 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8623   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8624   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8625 }
8626 
8627 #define GET_REGISTER_MATCHER
8628 #define GET_MATCHER_IMPLEMENTATION
8629 #define GET_MNEMONIC_SPELL_CHECKER
8630 #define GET_MNEMONIC_CHECKER
8631 #include "AMDGPUGenAsmMatcher.inc"
8632 
8633 // This function should be defined after auto-generated include so that we have
8634 // MatchClassKind enum defined
8635 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8636                                                      unsigned Kind) {
8637   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8638   // But MatchInstructionImpl() expects to meet token and fails to validate
8639   // operand. This method checks if we are given immediate operand but expect to
8640   // get corresponding token.
8641   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8642   switch (Kind) {
8643   case MCK_addr64:
8644     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8645   case MCK_gds:
8646     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8647   case MCK_lds:
8648     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8649   case MCK_idxen:
8650     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8651   case MCK_offen:
8652     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8653   case MCK_SSrcB32:
8654     // When operands have expression values, they will return true for isToken,
8655     // because it is not possible to distinguish between a token and an
8656     // expression at parse time. MatchInstructionImpl() will always try to
8657     // match an operand as a token, when isToken returns true, and when the
8658     // name of the expression is not a valid token, the match will fail,
8659     // so we need to handle it here.
8660     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8661   case MCK_SSrcF32:
8662     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8663   case MCK_SoppBrTarget:
8664     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8665   case MCK_VReg32OrOff:
8666     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8667   case MCK_InterpSlot:
8668     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8669   case MCK_Attr:
8670     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8671   case MCK_AttrChan:
8672     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8673   case MCK_ImmSMEMOffset:
8674     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8675   case MCK_SReg_64:
8676   case MCK_SReg_64_XEXEC:
8677     // Null is defined as a 32-bit register but
8678     // it should also be enabled with 64-bit operands.
8679     // The following code enables it for SReg_64 operands
8680     // used as source and destination. Remaining source
8681     // operands are handled in isInlinableImm.
8682     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8683   default:
8684     return Match_InvalidOperand;
8685   }
8686 }
8687 
8688 //===----------------------------------------------------------------------===//
8689 // endpgm
8690 //===----------------------------------------------------------------------===//
8691 
8692 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8693   SMLoc S = getLoc();
8694   int64_t Imm = 0;
8695 
8696   if (!parseExpr(Imm)) {
8697     // The operand is optional, if not present default to 0
8698     Imm = 0;
8699   }
8700 
8701   if (!isUInt<16>(Imm)) {
8702     Error(S, "expected a 16-bit value");
8703     return MatchOperand_ParseFail;
8704   }
8705 
8706   Operands.push_back(
8707       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8708   return MatchOperand_Success;
8709 }
8710 
8711 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8712