1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   MCContext *Ctx = nullptr;
1129 
1130   void usesSgprAt(int i) {
1131     if (i >= SgprIndexUnusedMin) {
1132       SgprIndexUnusedMin = ++i;
1133       if (Ctx) {
1134         MCSymbol* const Sym =
1135           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1136         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1137       }
1138     }
1139   }
1140 
1141   void usesVgprAt(int i) {
1142     if (i >= VgprIndexUnusedMin) {
1143       VgprIndexUnusedMin = ++i;
1144       if (Ctx) {
1145         MCSymbol* const Sym =
1146           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1147         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1148       }
1149     }
1150   }
1151 
1152 public:
1153   KernelScopeInfo() = default;
1154 
1155   void initialize(MCContext &Context) {
1156     Ctx = &Context;
1157     usesSgprAt(SgprIndexUnusedMin = -1);
1158     usesVgprAt(VgprIndexUnusedMin = -1);
1159   }
1160 
1161   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1162     switch (RegKind) {
1163       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1164       case IS_AGPR: // fall through
1165       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1166       default: break;
1167     }
1168   }
1169 };
1170 
1171 class AMDGPUAsmParser : public MCTargetAsmParser {
1172   MCAsmParser &Parser;
1173 
1174   // Number of extra operands parsed after the first optional operand.
1175   // This may be necessary to skip hardcoded mandatory operands.
1176   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1177 
1178   unsigned ForcedEncodingSize = 0;
1179   bool ForcedDPP = false;
1180   bool ForcedSDWA = false;
1181   KernelScopeInfo KernelScope;
1182   unsigned CPolSeen;
1183 
1184   /// @name Auto-generated Match Functions
1185   /// {
1186 
1187 #define GET_ASSEMBLER_HEADER
1188 #include "AMDGPUGenAsmMatcher.inc"
1189 
1190   /// }
1191 
1192 private:
1193   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1194   bool OutOfRangeError(SMRange Range);
1195   /// Calculate VGPR/SGPR blocks required for given target, reserved
1196   /// registers, and user-specified NextFreeXGPR values.
1197   ///
1198   /// \param Features [in] Target features, used for bug corrections.
1199   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1200   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1201   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1202   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1203   /// descriptor field, if valid.
1204   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1205   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1206   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1207   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1208   /// \param VGPRBlocks [out] Result VGPR block count.
1209   /// \param SGPRBlocks [out] Result SGPR block count.
1210   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1211                           bool FlatScrUsed, bool XNACKUsed,
1212                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1213                           SMRange VGPRRange, unsigned NextFreeSGPR,
1214                           SMRange SGPRRange, unsigned &VGPRBlocks,
1215                           unsigned &SGPRBlocks);
1216   bool ParseDirectiveAMDGCNTarget();
1217   bool ParseDirectiveAMDHSAKernel();
1218   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1219   bool ParseDirectiveHSACodeObjectVersion();
1220   bool ParseDirectiveHSACodeObjectISA();
1221   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1222   bool ParseDirectiveAMDKernelCodeT();
1223   // TODO: Possibly make subtargetHasRegister const.
1224   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1225   bool ParseDirectiveAMDGPUHsaKernel();
1226 
1227   bool ParseDirectiveISAVersion();
1228   bool ParseDirectiveHSAMetadata();
1229   bool ParseDirectivePALMetadataBegin();
1230   bool ParseDirectivePALMetadata();
1231   bool ParseDirectiveAMDGPULDS();
1232 
1233   /// Common code to parse out a block of text (typically YAML) between start and
1234   /// end directives.
1235   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1236                            const char *AssemblerDirectiveEnd,
1237                            std::string &CollectString);
1238 
1239   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1240                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1241   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1242                            unsigned &RegNum, unsigned &RegWidth,
1243                            bool RestoreOnFailure = false);
1244   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1245                            unsigned &RegNum, unsigned &RegWidth,
1246                            SmallVectorImpl<AsmToken> &Tokens);
1247   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1248                            unsigned &RegWidth,
1249                            SmallVectorImpl<AsmToken> &Tokens);
1250   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1251                            unsigned &RegWidth,
1252                            SmallVectorImpl<AsmToken> &Tokens);
1253   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1254                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1255   bool ParseRegRange(unsigned& Num, unsigned& Width);
1256   unsigned getRegularReg(RegisterKind RegKind,
1257                          unsigned RegNum,
1258                          unsigned RegWidth,
1259                          SMLoc Loc);
1260 
1261   bool isRegister();
1262   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1263   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1264   void initializeGprCountSymbol(RegisterKind RegKind);
1265   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1266                              unsigned RegWidth);
1267   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1268                     bool IsAtomic, bool IsLds = false);
1269   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1270                  bool IsGdsHardcoded);
1271 
1272 public:
1273   enum AMDGPUMatchResultTy {
1274     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1275   };
1276   enum OperandMode {
1277     OperandMode_Default,
1278     OperandMode_NSA,
1279   };
1280 
1281   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1282 
1283   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1284                const MCInstrInfo &MII,
1285                const MCTargetOptions &Options)
1286       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1287     MCAsmParserExtension::Initialize(Parser);
1288 
1289     if (getFeatureBits().none()) {
1290       // Set default features.
1291       copySTI().ToggleFeature("southern-islands");
1292     }
1293 
1294     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1295 
1296     {
1297       // TODO: make those pre-defined variables read-only.
1298       // Currently there is none suitable machinery in the core llvm-mc for this.
1299       // MCSymbol::isRedefinable is intended for another purpose, and
1300       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1301       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1302       MCContext &Ctx = getContext();
1303       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       } else {
1312         MCSymbol *Sym =
1313             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1315         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1316         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1317         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1318         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1319       }
1320       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1321         initializeGprCountSymbol(IS_VGPR);
1322         initializeGprCountSymbol(IS_SGPR);
1323       } else
1324         KernelScope.initialize(getContext());
1325     }
1326   }
1327 
1328   bool hasMIMG_R128() const {
1329     return AMDGPU::hasMIMG_R128(getSTI());
1330   }
1331 
1332   bool hasPackedD16() const {
1333     return AMDGPU::hasPackedD16(getSTI());
1334   }
1335 
1336   bool hasGFX10A16() const {
1337     return AMDGPU::hasGFX10A16(getSTI());
1338   }
1339 
1340   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1341 
1342   bool isSI() const {
1343     return AMDGPU::isSI(getSTI());
1344   }
1345 
1346   bool isCI() const {
1347     return AMDGPU::isCI(getSTI());
1348   }
1349 
1350   bool isVI() const {
1351     return AMDGPU::isVI(getSTI());
1352   }
1353 
1354   bool isGFX9() const {
1355     return AMDGPU::isGFX9(getSTI());
1356   }
1357 
1358   bool isGFX90A() const {
1359     return AMDGPU::isGFX90A(getSTI());
1360   }
1361 
1362   bool isGFX9Plus() const {
1363     return AMDGPU::isGFX9Plus(getSTI());
1364   }
1365 
1366   bool isGFX10() const {
1367     return AMDGPU::isGFX10(getSTI());
1368   }
1369 
1370   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1371 
1372   bool isGFX10_BEncoding() const {
1373     return AMDGPU::isGFX10_BEncoding(getSTI());
1374   }
1375 
1376   bool hasInv2PiInlineImm() const {
1377     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1378   }
1379 
1380   bool hasFlatOffsets() const {
1381     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1382   }
1383 
1384   bool hasArchitectedFlatScratch() const {
1385     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1386   }
1387 
1388   bool hasSGPR102_SGPR103() const {
1389     return !isVI() && !isGFX9();
1390   }
1391 
1392   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1393 
1394   bool hasIntClamp() const {
1395     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1396   }
1397 
1398   AMDGPUTargetStreamer &getTargetStreamer() {
1399     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1400     return static_cast<AMDGPUTargetStreamer &>(TS);
1401   }
1402 
1403   const MCRegisterInfo *getMRI() const {
1404     // We need this const_cast because for some reason getContext() is not const
1405     // in MCAsmParser.
1406     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1407   }
1408 
1409   const MCInstrInfo *getMII() const {
1410     return &MII;
1411   }
1412 
1413   const FeatureBitset &getFeatureBits() const {
1414     return getSTI().getFeatureBits();
1415   }
1416 
1417   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1418   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1419   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1420 
1421   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1422   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1423   bool isForcedDPP() const { return ForcedDPP; }
1424   bool isForcedSDWA() const { return ForcedSDWA; }
1425   ArrayRef<unsigned> getMatchedVariants() const;
1426   StringRef getMatchedVariantName() const;
1427 
1428   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1429   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1430                      bool RestoreOnFailure);
1431   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1432   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1433                                         SMLoc &EndLoc) override;
1434   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1435   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1436                                       unsigned Kind) override;
1437   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1438                                OperandVector &Operands, MCStreamer &Out,
1439                                uint64_t &ErrorInfo,
1440                                bool MatchingInlineAsm) override;
1441   bool ParseDirective(AsmToken DirectiveID) override;
1442   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1443                                     OperandMode Mode = OperandMode_Default);
1444   StringRef parseMnemonicSuffix(StringRef Name);
1445   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1446                         SMLoc NameLoc, OperandVector &Operands) override;
1447   //bool ProcessInstruction(MCInst &Inst);
1448 
1449   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1450 
1451   OperandMatchResultTy
1452   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1453                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1454                      bool (*ConvertResult)(int64_t &) = nullptr);
1455 
1456   OperandMatchResultTy
1457   parseOperandArrayWithPrefix(const char *Prefix,
1458                               OperandVector &Operands,
1459                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1460                               bool (*ConvertResult)(int64_t&) = nullptr);
1461 
1462   OperandMatchResultTy
1463   parseNamedBit(StringRef Name, OperandVector &Operands,
1464                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1465   OperandMatchResultTy parseCPol(OperandVector &Operands);
1466   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1467                                              StringRef &Value,
1468                                              SMLoc &StringLoc);
1469 
1470   bool isModifier();
1471   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1474   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1475   bool parseSP3NegModifier();
1476   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1477   OperandMatchResultTy parseReg(OperandVector &Operands);
1478   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1479   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1480   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1481   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1482   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1483   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1484   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1485   OperandMatchResultTy parseUfmt(int64_t &Format);
1486   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1488   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1489   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1490   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1491   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1492   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1493 
1494   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1495   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1496   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1497   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1498 
1499   bool parseCnt(int64_t &IntVal);
1500   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1501   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1502 
1503 private:
1504   struct OperandInfoTy {
1505     SMLoc Loc;
1506     int64_t Id;
1507     bool IsSymbolic = false;
1508     bool IsDefined = false;
1509 
1510     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1511   };
1512 
1513   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1514   bool validateSendMsg(const OperandInfoTy &Msg,
1515                        const OperandInfoTy &Op,
1516                        const OperandInfoTy &Stream);
1517 
1518   bool parseHwregBody(OperandInfoTy &HwReg,
1519                       OperandInfoTy &Offset,
1520                       OperandInfoTy &Width);
1521   bool validateHwreg(const OperandInfoTy &HwReg,
1522                      const OperandInfoTy &Offset,
1523                      const OperandInfoTy &Width);
1524 
1525   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1526   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1527 
1528   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1529                       const OperandVector &Operands) const;
1530   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1531   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1532   SMLoc getLitLoc(const OperandVector &Operands) const;
1533   SMLoc getConstLoc(const OperandVector &Operands) const;
1534 
1535   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1536   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1538   bool validateSOPLiteral(const MCInst &Inst) const;
1539   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateIntClampSupported(const MCInst &Inst);
1542   bool validateMIMGAtomicDMask(const MCInst &Inst);
1543   bool validateMIMGGatherDMask(const MCInst &Inst);
1544   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1545   bool validateMIMGDataSize(const MCInst &Inst);
1546   bool validateMIMGAddrSize(const MCInst &Inst);
1547   bool validateMIMGD16(const MCInst &Inst);
1548   bool validateMIMGDim(const MCInst &Inst);
1549   bool validateMIMGMSAA(const MCInst &Inst);
1550   bool validateOpSel(const MCInst &Inst);
1551   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1552   bool validateVccOperand(unsigned Reg) const;
1553   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1555   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1556   bool validateAGPRLdSt(const MCInst &Inst) const;
1557   bool validateVGPRAlign(const MCInst &Inst) const;
1558   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1559   bool validateDivScale(const MCInst &Inst);
1560   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1561                              const SMLoc &IDLoc);
1562   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1563   unsigned getConstantBusLimit(unsigned Opcode) const;
1564   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1565   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1566   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1567 
1568   bool isSupportedMnemo(StringRef Mnemo,
1569                         const FeatureBitset &FBS);
1570   bool isSupportedMnemo(StringRef Mnemo,
1571                         const FeatureBitset &FBS,
1572                         ArrayRef<unsigned> Variants);
1573   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1574 
1575   bool isId(const StringRef Id) const;
1576   bool isId(const AsmToken &Token, const StringRef Id) const;
1577   bool isToken(const AsmToken::TokenKind Kind) const;
1578   bool trySkipId(const StringRef Id);
1579   bool trySkipId(const StringRef Pref, const StringRef Id);
1580   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1581   bool trySkipToken(const AsmToken::TokenKind Kind);
1582   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1583   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1584   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1585 
1586   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1587   AsmToken::TokenKind getTokenKind() const;
1588   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1589   bool parseExpr(OperandVector &Operands);
1590   StringRef getTokenStr() const;
1591   AsmToken peekToken();
1592   AsmToken getToken() const;
1593   SMLoc getLoc() const;
1594   void lex();
1595 
1596 public:
1597   void onBeginOfFile() override;
1598 
1599   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1600   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1601 
1602   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1603   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1604   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1605   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1606   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1607   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1608 
1609   bool parseSwizzleOperand(int64_t &Op,
1610                            const unsigned MinVal,
1611                            const unsigned MaxVal,
1612                            const StringRef ErrMsg,
1613                            SMLoc &Loc);
1614   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1615                             const unsigned MinVal,
1616                             const unsigned MaxVal,
1617                             const StringRef ErrMsg);
1618   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1619   bool parseSwizzleOffset(int64_t &Imm);
1620   bool parseSwizzleMacro(int64_t &Imm);
1621   bool parseSwizzleQuadPerm(int64_t &Imm);
1622   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1623   bool parseSwizzleBroadcast(int64_t &Imm);
1624   bool parseSwizzleSwap(int64_t &Imm);
1625   bool parseSwizzleReverse(int64_t &Imm);
1626 
1627   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1628   int64_t parseGPRIdxMacro();
1629 
1630   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1631   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1632   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1633   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1634 
1635   AMDGPUOperand::Ptr defaultCPol() const;
1636 
1637   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1638   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1639   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1640   AMDGPUOperand::Ptr defaultFlatOffset() const;
1641 
1642   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1643 
1644   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1645                OptionalImmIndexMap &OptionalIdx);
1646   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1647   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1648   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1649   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1650                 OptionalImmIndexMap &OptionalIdx);
1651 
1652   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1653 
1654   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1655                bool IsAtomic = false);
1656   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1657   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1658 
1659   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1660 
1661   bool parseDimId(unsigned &Encoding);
1662   OperandMatchResultTy parseDim(OperandVector &Operands);
1663   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1664   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1665   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1666   int64_t parseDPPCtrlSel(StringRef Ctrl);
1667   int64_t parseDPPCtrlPerm();
1668   AMDGPUOperand::Ptr defaultRowMask() const;
1669   AMDGPUOperand::Ptr defaultBankMask() const;
1670   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1671   AMDGPUOperand::Ptr defaultFI() const;
1672   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1673   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1674 
1675   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1676                                     AMDGPUOperand::ImmTy Type);
1677   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1678   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1679   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1680   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1681   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1682   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1683   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1684                uint64_t BasicInstType,
1685                bool SkipDstVcc = false,
1686                bool SkipSrcVcc = false);
1687 
1688   AMDGPUOperand::Ptr defaultBLGP() const;
1689   AMDGPUOperand::Ptr defaultCBSZ() const;
1690   AMDGPUOperand::Ptr defaultABID() const;
1691 
1692   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1693   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1694 };
1695 
1696 struct OptionalOperand {
1697   const char *Name;
1698   AMDGPUOperand::ImmTy Type;
1699   bool IsBit;
1700   bool (*ConvertResult)(int64_t&);
1701 };
1702 
1703 } // end anonymous namespace
1704 
1705 // May be called with integer type with equivalent bitwidth.
1706 static const fltSemantics *getFltSemantics(unsigned Size) {
1707   switch (Size) {
1708   case 4:
1709     return &APFloat::IEEEsingle();
1710   case 8:
1711     return &APFloat::IEEEdouble();
1712   case 2:
1713     return &APFloat::IEEEhalf();
1714   default:
1715     llvm_unreachable("unsupported fp type");
1716   }
1717 }
1718 
1719 static const fltSemantics *getFltSemantics(MVT VT) {
1720   return getFltSemantics(VT.getSizeInBits() / 8);
1721 }
1722 
1723 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1724   switch (OperandType) {
1725   case AMDGPU::OPERAND_REG_IMM_INT32:
1726   case AMDGPU::OPERAND_REG_IMM_FP32:
1727   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1728   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1729   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1730   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1733   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1734   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1735   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1736   case AMDGPU::OPERAND_KIMM32:
1737     return &APFloat::IEEEsingle();
1738   case AMDGPU::OPERAND_REG_IMM_INT64:
1739   case AMDGPU::OPERAND_REG_IMM_FP64:
1740   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1741   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1743     return &APFloat::IEEEdouble();
1744   case AMDGPU::OPERAND_REG_IMM_INT16:
1745   case AMDGPU::OPERAND_REG_IMM_FP16:
1746   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1747   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1748   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1749   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1750   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1751   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1752   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1753   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1754   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1755   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1756   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1757   case AMDGPU::OPERAND_KIMM16:
1758     return &APFloat::IEEEhalf();
1759   default:
1760     llvm_unreachable("unsupported fp type");
1761   }
1762 }
1763 
1764 //===----------------------------------------------------------------------===//
1765 // Operand
1766 //===----------------------------------------------------------------------===//
1767 
1768 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1769   bool Lost;
1770 
1771   // Convert literal to single precision
1772   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1773                                                APFloat::rmNearestTiesToEven,
1774                                                &Lost);
1775   // We allow precision lost but not overflow or underflow
1776   if (Status != APFloat::opOK &&
1777       Lost &&
1778       ((Status & APFloat::opOverflow)  != 0 ||
1779        (Status & APFloat::opUnderflow) != 0)) {
1780     return false;
1781   }
1782 
1783   return true;
1784 }
1785 
1786 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1787   return isUIntN(Size, Val) || isIntN(Size, Val);
1788 }
1789 
1790 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1791   if (VT.getScalarType() == MVT::i16) {
1792     // FP immediate values are broken.
1793     return isInlinableIntLiteral(Val);
1794   }
1795 
1796   // f16/v2f16 operands work correctly for all values.
1797   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1798 }
1799 
1800 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1801 
1802   // This is a hack to enable named inline values like
1803   // shared_base with both 32-bit and 64-bit operands.
1804   // Note that these values are defined as
1805   // 32-bit operands only.
1806   if (isInlineValue()) {
1807     return true;
1808   }
1809 
1810   if (!isImmTy(ImmTyNone)) {
1811     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1812     return false;
1813   }
1814   // TODO: We should avoid using host float here. It would be better to
1815   // check the float bit values which is what a few other places do.
1816   // We've had bot failures before due to weird NaN support on mips hosts.
1817 
1818   APInt Literal(64, Imm.Val);
1819 
1820   if (Imm.IsFPImm) { // We got fp literal token
1821     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1822       return AMDGPU::isInlinableLiteral64(Imm.Val,
1823                                           AsmParser->hasInv2PiInlineImm());
1824     }
1825 
1826     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1827     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1828       return false;
1829 
1830     if (type.getScalarSizeInBits() == 16) {
1831       return isInlineableLiteralOp16(
1832         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1833         type, AsmParser->hasInv2PiInlineImm());
1834     }
1835 
1836     // Check if single precision literal is inlinable
1837     return AMDGPU::isInlinableLiteral32(
1838       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1839       AsmParser->hasInv2PiInlineImm());
1840   }
1841 
1842   // We got int literal token.
1843   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1844     return AMDGPU::isInlinableLiteral64(Imm.Val,
1845                                         AsmParser->hasInv2PiInlineImm());
1846   }
1847 
1848   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1849     return false;
1850   }
1851 
1852   if (type.getScalarSizeInBits() == 16) {
1853     return isInlineableLiteralOp16(
1854       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1855       type, AsmParser->hasInv2PiInlineImm());
1856   }
1857 
1858   return AMDGPU::isInlinableLiteral32(
1859     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1860     AsmParser->hasInv2PiInlineImm());
1861 }
1862 
1863 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1864   // Check that this immediate can be added as literal
1865   if (!isImmTy(ImmTyNone)) {
1866     return false;
1867   }
1868 
1869   if (!Imm.IsFPImm) {
1870     // We got int literal token.
1871 
1872     if (type == MVT::f64 && hasFPModifiers()) {
1873       // Cannot apply fp modifiers to int literals preserving the same semantics
1874       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1875       // disable these cases.
1876       return false;
1877     }
1878 
1879     unsigned Size = type.getSizeInBits();
1880     if (Size == 64)
1881       Size = 32;
1882 
1883     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1884     // types.
1885     return isSafeTruncation(Imm.Val, Size);
1886   }
1887 
1888   // We got fp literal token
1889   if (type == MVT::f64) { // Expected 64-bit fp operand
1890     // We would set low 64-bits of literal to zeroes but we accept this literals
1891     return true;
1892   }
1893 
1894   if (type == MVT::i64) { // Expected 64-bit int operand
1895     // We don't allow fp literals in 64-bit integer instructions. It is
1896     // unclear how we should encode them.
1897     return false;
1898   }
1899 
1900   // We allow fp literals with f16x2 operands assuming that the specified
1901   // literal goes into the lower half and the upper half is zero. We also
1902   // require that the literal may be losslesly converted to f16.
1903   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1904                      (type == MVT::v2i16)? MVT::i16 :
1905                      (type == MVT::v2f32)? MVT::f32 : type;
1906 
1907   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1908   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1909 }
1910 
1911 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1912   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1913 }
1914 
1915 bool AMDGPUOperand::isVRegWithInputMods() const {
1916   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1917          // GFX90A allows DPP on 64-bit operands.
1918          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1919           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1923   if (AsmParser->isVI())
1924     return isVReg32();
1925   else if (AsmParser->isGFX9Plus())
1926     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1927   else
1928     return false;
1929 }
1930 
1931 bool AMDGPUOperand::isSDWAFP16Operand() const {
1932   return isSDWAOperand(MVT::f16);
1933 }
1934 
1935 bool AMDGPUOperand::isSDWAFP32Operand() const {
1936   return isSDWAOperand(MVT::f32);
1937 }
1938 
1939 bool AMDGPUOperand::isSDWAInt16Operand() const {
1940   return isSDWAOperand(MVT::i16);
1941 }
1942 
1943 bool AMDGPUOperand::isSDWAInt32Operand() const {
1944   return isSDWAOperand(MVT::i32);
1945 }
1946 
1947 bool AMDGPUOperand::isBoolReg() const {
1948   auto FB = AsmParser->getFeatureBits();
1949   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1950                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1951 }
1952 
1953 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1954 {
1955   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1956   assert(Size == 2 || Size == 4 || Size == 8);
1957 
1958   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1959 
1960   if (Imm.Mods.Abs) {
1961     Val &= ~FpSignMask;
1962   }
1963   if (Imm.Mods.Neg) {
1964     Val ^= FpSignMask;
1965   }
1966 
1967   return Val;
1968 }
1969 
1970 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1971   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1972                              Inst.getNumOperands())) {
1973     addLiteralImmOperand(Inst, Imm.Val,
1974                          ApplyModifiers &
1975                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1976   } else {
1977     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1978     Inst.addOperand(MCOperand::createImm(Imm.Val));
1979     setImmKindNone();
1980   }
1981 }
1982 
1983 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1984   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1985   auto OpNum = Inst.getNumOperands();
1986   // Check that this operand accepts literals
1987   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1988 
1989   if (ApplyModifiers) {
1990     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1991     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1992     Val = applyInputFPModifiers(Val, Size);
1993   }
1994 
1995   APInt Literal(64, Val);
1996   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1997 
1998   if (Imm.IsFPImm) { // We got fp literal token
1999     switch (OpTy) {
2000     case AMDGPU::OPERAND_REG_IMM_INT64:
2001     case AMDGPU::OPERAND_REG_IMM_FP64:
2002     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2003     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2004     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2005       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2006                                        AsmParser->hasInv2PiInlineImm())) {
2007         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2008         setImmKindConst();
2009         return;
2010       }
2011 
2012       // Non-inlineable
2013       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2014         // For fp operands we check if low 32 bits are zeros
2015         if (Literal.getLoBits(32) != 0) {
2016           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2017           "Can't encode literal as exact 64-bit floating-point operand. "
2018           "Low 32-bits will be set to zero");
2019         }
2020 
2021         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2022         setImmKindLiteral();
2023         return;
2024       }
2025 
2026       // We don't allow fp literals in 64-bit integer instructions. It is
2027       // unclear how we should encode them. This case should be checked earlier
2028       // in predicate methods (isLiteralImm())
2029       llvm_unreachable("fp literal in 64-bit integer instruction.");
2030 
2031     case AMDGPU::OPERAND_REG_IMM_INT32:
2032     case AMDGPU::OPERAND_REG_IMM_FP32:
2033     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2034     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2035     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2036     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2037     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2038     case AMDGPU::OPERAND_REG_IMM_INT16:
2039     case AMDGPU::OPERAND_REG_IMM_FP16:
2040     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2041     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2042     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2043     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2044     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2045     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2046     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2047     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2048     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2049     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2050     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2051     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2052     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2053     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2054     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2055     case AMDGPU::OPERAND_KIMM32:
2056     case AMDGPU::OPERAND_KIMM16: {
2057       bool lost;
2058       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2059       // Convert literal to single precision
2060       FPLiteral.convert(*getOpFltSemantics(OpTy),
2061                         APFloat::rmNearestTiesToEven, &lost);
2062       // We allow precision lost but not overflow or underflow. This should be
2063       // checked earlier in isLiteralImm()
2064 
2065       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2066       Inst.addOperand(MCOperand::createImm(ImmVal));
2067       setImmKindLiteral();
2068       return;
2069     }
2070     default:
2071       llvm_unreachable("invalid operand size");
2072     }
2073 
2074     return;
2075   }
2076 
2077   // We got int literal token.
2078   // Only sign extend inline immediates.
2079   switch (OpTy) {
2080   case AMDGPU::OPERAND_REG_IMM_INT32:
2081   case AMDGPU::OPERAND_REG_IMM_FP32:
2082   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2083   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2084   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2085   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2086   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2087   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2088   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2089   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2090   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2091   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2092   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2093     if (isSafeTruncation(Val, 32) &&
2094         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2095                                      AsmParser->hasInv2PiInlineImm())) {
2096       Inst.addOperand(MCOperand::createImm(Val));
2097       setImmKindConst();
2098       return;
2099     }
2100 
2101     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2102     setImmKindLiteral();
2103     return;
2104 
2105   case AMDGPU::OPERAND_REG_IMM_INT64:
2106   case AMDGPU::OPERAND_REG_IMM_FP64:
2107   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2108   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2109   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2110     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_IMM_INT16:
2121   case AMDGPU::OPERAND_REG_IMM_FP16:
2122   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2123   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2124   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2125   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2126   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2127     if (isSafeTruncation(Val, 16) &&
2128         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2129                                      AsmParser->hasInv2PiInlineImm())) {
2130       Inst.addOperand(MCOperand::createImm(Val));
2131       setImmKindConst();
2132       return;
2133     }
2134 
2135     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2136     setImmKindLiteral();
2137     return;
2138 
2139   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2140   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2141   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2142   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2143     assert(isSafeTruncation(Val, 16));
2144     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2145                                         AsmParser->hasInv2PiInlineImm()));
2146 
2147     Inst.addOperand(MCOperand::createImm(Val));
2148     return;
2149   }
2150   case AMDGPU::OPERAND_KIMM32:
2151     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2152     setImmKindNone();
2153     return;
2154   case AMDGPU::OPERAND_KIMM16:
2155     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2156     setImmKindNone();
2157     return;
2158   default:
2159     llvm_unreachable("invalid operand size");
2160   }
2161 }
2162 
2163 template <unsigned Bitwidth>
2164 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2165   APInt Literal(64, Imm.Val);
2166   setImmKindNone();
2167 
2168   if (!Imm.IsFPImm) {
2169     // We got int literal token.
2170     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2171     return;
2172   }
2173 
2174   bool Lost;
2175   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2176   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2177                     APFloat::rmNearestTiesToEven, &Lost);
2178   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2179 }
2180 
2181 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2182   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2183 }
2184 
2185 static bool isInlineValue(unsigned Reg) {
2186   switch (Reg) {
2187   case AMDGPU::SRC_SHARED_BASE:
2188   case AMDGPU::SRC_SHARED_LIMIT:
2189   case AMDGPU::SRC_PRIVATE_BASE:
2190   case AMDGPU::SRC_PRIVATE_LIMIT:
2191   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2192     return true;
2193   case AMDGPU::SRC_VCCZ:
2194   case AMDGPU::SRC_EXECZ:
2195   case AMDGPU::SRC_SCC:
2196     return true;
2197   case AMDGPU::SGPR_NULL:
2198     return true;
2199   default:
2200     return false;
2201   }
2202 }
2203 
2204 bool AMDGPUOperand::isInlineValue() const {
2205   return isRegKind() && ::isInlineValue(getReg());
2206 }
2207 
2208 //===----------------------------------------------------------------------===//
2209 // AsmParser
2210 //===----------------------------------------------------------------------===//
2211 
2212 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2213   if (Is == IS_VGPR) {
2214     switch (RegWidth) {
2215       default: return -1;
2216       case 1: return AMDGPU::VGPR_32RegClassID;
2217       case 2: return AMDGPU::VReg_64RegClassID;
2218       case 3: return AMDGPU::VReg_96RegClassID;
2219       case 4: return AMDGPU::VReg_128RegClassID;
2220       case 5: return AMDGPU::VReg_160RegClassID;
2221       case 6: return AMDGPU::VReg_192RegClassID;
2222       case 7: return AMDGPU::VReg_224RegClassID;
2223       case 8: return AMDGPU::VReg_256RegClassID;
2224       case 16: return AMDGPU::VReg_512RegClassID;
2225       case 32: return AMDGPU::VReg_1024RegClassID;
2226     }
2227   } else if (Is == IS_TTMP) {
2228     switch (RegWidth) {
2229       default: return -1;
2230       case 1: return AMDGPU::TTMP_32RegClassID;
2231       case 2: return AMDGPU::TTMP_64RegClassID;
2232       case 4: return AMDGPU::TTMP_128RegClassID;
2233       case 8: return AMDGPU::TTMP_256RegClassID;
2234       case 16: return AMDGPU::TTMP_512RegClassID;
2235     }
2236   } else if (Is == IS_SGPR) {
2237     switch (RegWidth) {
2238       default: return -1;
2239       case 1: return AMDGPU::SGPR_32RegClassID;
2240       case 2: return AMDGPU::SGPR_64RegClassID;
2241       case 3: return AMDGPU::SGPR_96RegClassID;
2242       case 4: return AMDGPU::SGPR_128RegClassID;
2243       case 5: return AMDGPU::SGPR_160RegClassID;
2244       case 6: return AMDGPU::SGPR_192RegClassID;
2245       case 7: return AMDGPU::SGPR_224RegClassID;
2246       case 8: return AMDGPU::SGPR_256RegClassID;
2247       case 16: return AMDGPU::SGPR_512RegClassID;
2248     }
2249   } else if (Is == IS_AGPR) {
2250     switch (RegWidth) {
2251       default: return -1;
2252       case 1: return AMDGPU::AGPR_32RegClassID;
2253       case 2: return AMDGPU::AReg_64RegClassID;
2254       case 3: return AMDGPU::AReg_96RegClassID;
2255       case 4: return AMDGPU::AReg_128RegClassID;
2256       case 5: return AMDGPU::AReg_160RegClassID;
2257       case 6: return AMDGPU::AReg_192RegClassID;
2258       case 7: return AMDGPU::AReg_224RegClassID;
2259       case 8: return AMDGPU::AReg_256RegClassID;
2260       case 16: return AMDGPU::AReg_512RegClassID;
2261       case 32: return AMDGPU::AReg_1024RegClassID;
2262     }
2263   }
2264   return -1;
2265 }
2266 
2267 static unsigned getSpecialRegForName(StringRef RegName) {
2268   return StringSwitch<unsigned>(RegName)
2269     .Case("exec", AMDGPU::EXEC)
2270     .Case("vcc", AMDGPU::VCC)
2271     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2272     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2273     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2274     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2275     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2276     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2277     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2278     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2279     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2280     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2281     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2282     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2283     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2284     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2285     .Case("m0", AMDGPU::M0)
2286     .Case("vccz", AMDGPU::SRC_VCCZ)
2287     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2288     .Case("execz", AMDGPU::SRC_EXECZ)
2289     .Case("src_execz", AMDGPU::SRC_EXECZ)
2290     .Case("scc", AMDGPU::SRC_SCC)
2291     .Case("src_scc", AMDGPU::SRC_SCC)
2292     .Case("tba", AMDGPU::TBA)
2293     .Case("tma", AMDGPU::TMA)
2294     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2295     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2296     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2297     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2298     .Case("vcc_lo", AMDGPU::VCC_LO)
2299     .Case("vcc_hi", AMDGPU::VCC_HI)
2300     .Case("exec_lo", AMDGPU::EXEC_LO)
2301     .Case("exec_hi", AMDGPU::EXEC_HI)
2302     .Case("tma_lo", AMDGPU::TMA_LO)
2303     .Case("tma_hi", AMDGPU::TMA_HI)
2304     .Case("tba_lo", AMDGPU::TBA_LO)
2305     .Case("tba_hi", AMDGPU::TBA_HI)
2306     .Case("pc", AMDGPU::PC_REG)
2307     .Case("null", AMDGPU::SGPR_NULL)
2308     .Default(AMDGPU::NoRegister);
2309 }
2310 
2311 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2312                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2313   auto R = parseRegister();
2314   if (!R) return true;
2315   assert(R->isReg());
2316   RegNo = R->getReg();
2317   StartLoc = R->getStartLoc();
2318   EndLoc = R->getEndLoc();
2319   return false;
2320 }
2321 
2322 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2323                                     SMLoc &EndLoc) {
2324   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2325 }
2326 
2327 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2328                                                        SMLoc &StartLoc,
2329                                                        SMLoc &EndLoc) {
2330   bool Result =
2331       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2332   bool PendingErrors = getParser().hasPendingError();
2333   getParser().clearPendingErrors();
2334   if (PendingErrors)
2335     return MatchOperand_ParseFail;
2336   if (Result)
2337     return MatchOperand_NoMatch;
2338   return MatchOperand_Success;
2339 }
2340 
2341 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2342                                             RegisterKind RegKind, unsigned Reg1,
2343                                             SMLoc Loc) {
2344   switch (RegKind) {
2345   case IS_SPECIAL:
2346     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2347       Reg = AMDGPU::EXEC;
2348       RegWidth = 2;
2349       return true;
2350     }
2351     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2352       Reg = AMDGPU::FLAT_SCR;
2353       RegWidth = 2;
2354       return true;
2355     }
2356     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2357       Reg = AMDGPU::XNACK_MASK;
2358       RegWidth = 2;
2359       return true;
2360     }
2361     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2362       Reg = AMDGPU::VCC;
2363       RegWidth = 2;
2364       return true;
2365     }
2366     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2367       Reg = AMDGPU::TBA;
2368       RegWidth = 2;
2369       return true;
2370     }
2371     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2372       Reg = AMDGPU::TMA;
2373       RegWidth = 2;
2374       return true;
2375     }
2376     Error(Loc, "register does not fit in the list");
2377     return false;
2378   case IS_VGPR:
2379   case IS_SGPR:
2380   case IS_AGPR:
2381   case IS_TTMP:
2382     if (Reg1 != Reg + RegWidth) {
2383       Error(Loc, "registers in a list must have consecutive indices");
2384       return false;
2385     }
2386     RegWidth++;
2387     return true;
2388   default:
2389     llvm_unreachable("unexpected register kind");
2390   }
2391 }
2392 
2393 struct RegInfo {
2394   StringLiteral Name;
2395   RegisterKind Kind;
2396 };
2397 
2398 static constexpr RegInfo RegularRegisters[] = {
2399   {{"v"},    IS_VGPR},
2400   {{"s"},    IS_SGPR},
2401   {{"ttmp"}, IS_TTMP},
2402   {{"acc"},  IS_AGPR},
2403   {{"a"},    IS_AGPR},
2404 };
2405 
2406 static bool isRegularReg(RegisterKind Kind) {
2407   return Kind == IS_VGPR ||
2408          Kind == IS_SGPR ||
2409          Kind == IS_TTMP ||
2410          Kind == IS_AGPR;
2411 }
2412 
2413 static const RegInfo* getRegularRegInfo(StringRef Str) {
2414   for (const RegInfo &Reg : RegularRegisters)
2415     if (Str.startswith(Reg.Name))
2416       return &Reg;
2417   return nullptr;
2418 }
2419 
2420 static bool getRegNum(StringRef Str, unsigned& Num) {
2421   return !Str.getAsInteger(10, Num);
2422 }
2423 
2424 bool
2425 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2426                             const AsmToken &NextToken) const {
2427 
2428   // A list of consecutive registers: [s0,s1,s2,s3]
2429   if (Token.is(AsmToken::LBrac))
2430     return true;
2431 
2432   if (!Token.is(AsmToken::Identifier))
2433     return false;
2434 
2435   // A single register like s0 or a range of registers like s[0:1]
2436 
2437   StringRef Str = Token.getString();
2438   const RegInfo *Reg = getRegularRegInfo(Str);
2439   if (Reg) {
2440     StringRef RegName = Reg->Name;
2441     StringRef RegSuffix = Str.substr(RegName.size());
2442     if (!RegSuffix.empty()) {
2443       unsigned Num;
2444       // A single register with an index: rXX
2445       if (getRegNum(RegSuffix, Num))
2446         return true;
2447     } else {
2448       // A range of registers: r[XX:YY].
2449       if (NextToken.is(AsmToken::LBrac))
2450         return true;
2451     }
2452   }
2453 
2454   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2455 }
2456 
2457 bool
2458 AMDGPUAsmParser::isRegister()
2459 {
2460   return isRegister(getToken(), peekToken());
2461 }
2462 
2463 unsigned
2464 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2465                                unsigned RegNum,
2466                                unsigned RegWidth,
2467                                SMLoc Loc) {
2468 
2469   assert(isRegularReg(RegKind));
2470 
2471   unsigned AlignSize = 1;
2472   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2473     // SGPR and TTMP registers must be aligned.
2474     // Max required alignment is 4 dwords.
2475     AlignSize = std::min(RegWidth, 4u);
2476   }
2477 
2478   if (RegNum % AlignSize != 0) {
2479     Error(Loc, "invalid register alignment");
2480     return AMDGPU::NoRegister;
2481   }
2482 
2483   unsigned RegIdx = RegNum / AlignSize;
2484   int RCID = getRegClass(RegKind, RegWidth);
2485   if (RCID == -1) {
2486     Error(Loc, "invalid or unsupported register size");
2487     return AMDGPU::NoRegister;
2488   }
2489 
2490   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2491   const MCRegisterClass RC = TRI->getRegClass(RCID);
2492   if (RegIdx >= RC.getNumRegs()) {
2493     Error(Loc, "register index is out of range");
2494     return AMDGPU::NoRegister;
2495   }
2496 
2497   return RC.getRegister(RegIdx);
2498 }
2499 
2500 bool
2501 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2502   int64_t RegLo, RegHi;
2503   if (!skipToken(AsmToken::LBrac, "missing register index"))
2504     return false;
2505 
2506   SMLoc FirstIdxLoc = getLoc();
2507   SMLoc SecondIdxLoc;
2508 
2509   if (!parseExpr(RegLo))
2510     return false;
2511 
2512   if (trySkipToken(AsmToken::Colon)) {
2513     SecondIdxLoc = getLoc();
2514     if (!parseExpr(RegHi))
2515       return false;
2516   } else {
2517     RegHi = RegLo;
2518   }
2519 
2520   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2521     return false;
2522 
2523   if (!isUInt<32>(RegLo)) {
2524     Error(FirstIdxLoc, "invalid register index");
2525     return false;
2526   }
2527 
2528   if (!isUInt<32>(RegHi)) {
2529     Error(SecondIdxLoc, "invalid register index");
2530     return false;
2531   }
2532 
2533   if (RegLo > RegHi) {
2534     Error(FirstIdxLoc, "first register index should not exceed second index");
2535     return false;
2536   }
2537 
2538   Num = static_cast<unsigned>(RegLo);
2539   Width = (RegHi - RegLo) + 1;
2540   return true;
2541 }
2542 
2543 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2544                                           unsigned &RegNum, unsigned &RegWidth,
2545                                           SmallVectorImpl<AsmToken> &Tokens) {
2546   assert(isToken(AsmToken::Identifier));
2547   unsigned Reg = getSpecialRegForName(getTokenStr());
2548   if (Reg) {
2549     RegNum = 0;
2550     RegWidth = 1;
2551     RegKind = IS_SPECIAL;
2552     Tokens.push_back(getToken());
2553     lex(); // skip register name
2554   }
2555   return Reg;
2556 }
2557 
2558 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2559                                           unsigned &RegNum, unsigned &RegWidth,
2560                                           SmallVectorImpl<AsmToken> &Tokens) {
2561   assert(isToken(AsmToken::Identifier));
2562   StringRef RegName = getTokenStr();
2563   auto Loc = getLoc();
2564 
2565   const RegInfo *RI = getRegularRegInfo(RegName);
2566   if (!RI) {
2567     Error(Loc, "invalid register name");
2568     return AMDGPU::NoRegister;
2569   }
2570 
2571   Tokens.push_back(getToken());
2572   lex(); // skip register name
2573 
2574   RegKind = RI->Kind;
2575   StringRef RegSuffix = RegName.substr(RI->Name.size());
2576   if (!RegSuffix.empty()) {
2577     // Single 32-bit register: vXX.
2578     if (!getRegNum(RegSuffix, RegNum)) {
2579       Error(Loc, "invalid register index");
2580       return AMDGPU::NoRegister;
2581     }
2582     RegWidth = 1;
2583   } else {
2584     // Range of registers: v[XX:YY]. ":YY" is optional.
2585     if (!ParseRegRange(RegNum, RegWidth))
2586       return AMDGPU::NoRegister;
2587   }
2588 
2589   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2590 }
2591 
2592 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2593                                        unsigned &RegWidth,
2594                                        SmallVectorImpl<AsmToken> &Tokens) {
2595   unsigned Reg = AMDGPU::NoRegister;
2596   auto ListLoc = getLoc();
2597 
2598   if (!skipToken(AsmToken::LBrac,
2599                  "expected a register or a list of registers")) {
2600     return AMDGPU::NoRegister;
2601   }
2602 
2603   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2604 
2605   auto Loc = getLoc();
2606   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2607     return AMDGPU::NoRegister;
2608   if (RegWidth != 1) {
2609     Error(Loc, "expected a single 32-bit register");
2610     return AMDGPU::NoRegister;
2611   }
2612 
2613   for (; trySkipToken(AsmToken::Comma); ) {
2614     RegisterKind NextRegKind;
2615     unsigned NextReg, NextRegNum, NextRegWidth;
2616     Loc = getLoc();
2617 
2618     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2619                              NextRegNum, NextRegWidth,
2620                              Tokens)) {
2621       return AMDGPU::NoRegister;
2622     }
2623     if (NextRegWidth != 1) {
2624       Error(Loc, "expected a single 32-bit register");
2625       return AMDGPU::NoRegister;
2626     }
2627     if (NextRegKind != RegKind) {
2628       Error(Loc, "registers in a list must be of the same kind");
2629       return AMDGPU::NoRegister;
2630     }
2631     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2632       return AMDGPU::NoRegister;
2633   }
2634 
2635   if (!skipToken(AsmToken::RBrac,
2636                  "expected a comma or a closing square bracket")) {
2637     return AMDGPU::NoRegister;
2638   }
2639 
2640   if (isRegularReg(RegKind))
2641     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2642 
2643   return Reg;
2644 }
2645 
2646 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2647                                           unsigned &RegNum, unsigned &RegWidth,
2648                                           SmallVectorImpl<AsmToken> &Tokens) {
2649   auto Loc = getLoc();
2650   Reg = AMDGPU::NoRegister;
2651 
2652   if (isToken(AsmToken::Identifier)) {
2653     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2654     if (Reg == AMDGPU::NoRegister)
2655       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2656   } else {
2657     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2658   }
2659 
2660   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2661   if (Reg == AMDGPU::NoRegister) {
2662     assert(Parser.hasPendingError());
2663     return false;
2664   }
2665 
2666   if (!subtargetHasRegister(*TRI, Reg)) {
2667     if (Reg == AMDGPU::SGPR_NULL) {
2668       Error(Loc, "'null' operand is not supported on this GPU");
2669     } else {
2670       Error(Loc, "register not available on this GPU");
2671     }
2672     return false;
2673   }
2674 
2675   return true;
2676 }
2677 
2678 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2679                                           unsigned &RegNum, unsigned &RegWidth,
2680                                           bool RestoreOnFailure /*=false*/) {
2681   Reg = AMDGPU::NoRegister;
2682 
2683   SmallVector<AsmToken, 1> Tokens;
2684   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2685     if (RestoreOnFailure) {
2686       while (!Tokens.empty()) {
2687         getLexer().UnLex(Tokens.pop_back_val());
2688       }
2689     }
2690     return true;
2691   }
2692   return false;
2693 }
2694 
2695 Optional<StringRef>
2696 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2697   switch (RegKind) {
2698   case IS_VGPR:
2699     return StringRef(".amdgcn.next_free_vgpr");
2700   case IS_SGPR:
2701     return StringRef(".amdgcn.next_free_sgpr");
2702   default:
2703     return None;
2704   }
2705 }
2706 
2707 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2708   auto SymbolName = getGprCountSymbolName(RegKind);
2709   assert(SymbolName && "initializing invalid register kind");
2710   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2711   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2712 }
2713 
2714 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2715                                             unsigned DwordRegIndex,
2716                                             unsigned RegWidth) {
2717   // Symbols are only defined for GCN targets
2718   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2719     return true;
2720 
2721   auto SymbolName = getGprCountSymbolName(RegKind);
2722   if (!SymbolName)
2723     return true;
2724   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2725 
2726   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2727   int64_t OldCount;
2728 
2729   if (!Sym->isVariable())
2730     return !Error(getLoc(),
2731                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2732   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2733     return !Error(
2734         getLoc(),
2735         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2736 
2737   if (OldCount <= NewMax)
2738     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2739 
2740   return true;
2741 }
2742 
2743 std::unique_ptr<AMDGPUOperand>
2744 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2745   const auto &Tok = getToken();
2746   SMLoc StartLoc = Tok.getLoc();
2747   SMLoc EndLoc = Tok.getEndLoc();
2748   RegisterKind RegKind;
2749   unsigned Reg, RegNum, RegWidth;
2750 
2751   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2752     return nullptr;
2753   }
2754   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2755     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2756       return nullptr;
2757   } else
2758     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2759   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2760 }
2761 
2762 OperandMatchResultTy
2763 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2764   // TODO: add syntactic sugar for 1/(2*PI)
2765 
2766   assert(!isRegister());
2767   assert(!isModifier());
2768 
2769   const auto& Tok = getToken();
2770   const auto& NextTok = peekToken();
2771   bool IsReal = Tok.is(AsmToken::Real);
2772   SMLoc S = getLoc();
2773   bool Negate = false;
2774 
2775   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2776     lex();
2777     IsReal = true;
2778     Negate = true;
2779   }
2780 
2781   if (IsReal) {
2782     // Floating-point expressions are not supported.
2783     // Can only allow floating-point literals with an
2784     // optional sign.
2785 
2786     StringRef Num = getTokenStr();
2787     lex();
2788 
2789     APFloat RealVal(APFloat::IEEEdouble());
2790     auto roundMode = APFloat::rmNearestTiesToEven;
2791     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2792       return MatchOperand_ParseFail;
2793     }
2794     if (Negate)
2795       RealVal.changeSign();
2796 
2797     Operands.push_back(
2798       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2799                                AMDGPUOperand::ImmTyNone, true));
2800 
2801     return MatchOperand_Success;
2802 
2803   } else {
2804     int64_t IntVal;
2805     const MCExpr *Expr;
2806     SMLoc S = getLoc();
2807 
2808     if (HasSP3AbsModifier) {
2809       // This is a workaround for handling expressions
2810       // as arguments of SP3 'abs' modifier, for example:
2811       //     |1.0|
2812       //     |-1|
2813       //     |1+x|
2814       // This syntax is not compatible with syntax of standard
2815       // MC expressions (due to the trailing '|').
2816       SMLoc EndLoc;
2817       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2818         return MatchOperand_ParseFail;
2819     } else {
2820       if (Parser.parseExpression(Expr))
2821         return MatchOperand_ParseFail;
2822     }
2823 
2824     if (Expr->evaluateAsAbsolute(IntVal)) {
2825       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2826     } else {
2827       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2828     }
2829 
2830     return MatchOperand_Success;
2831   }
2832 
2833   return MatchOperand_NoMatch;
2834 }
2835 
2836 OperandMatchResultTy
2837 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2838   if (!isRegister())
2839     return MatchOperand_NoMatch;
2840 
2841   if (auto R = parseRegister()) {
2842     assert(R->isReg());
2843     Operands.push_back(std::move(R));
2844     return MatchOperand_Success;
2845   }
2846   return MatchOperand_ParseFail;
2847 }
2848 
2849 OperandMatchResultTy
2850 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2851   auto res = parseReg(Operands);
2852   if (res != MatchOperand_NoMatch) {
2853     return res;
2854   } else if (isModifier()) {
2855     return MatchOperand_NoMatch;
2856   } else {
2857     return parseImm(Operands, HasSP3AbsMod);
2858   }
2859 }
2860 
2861 bool
2862 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2863   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2864     const auto &str = Token.getString();
2865     return str == "abs" || str == "neg" || str == "sext";
2866   }
2867   return false;
2868 }
2869 
2870 bool
2871 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2872   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2873 }
2874 
2875 bool
2876 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2877   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2878 }
2879 
2880 bool
2881 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2882   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2883 }
2884 
2885 // Check if this is an operand modifier or an opcode modifier
2886 // which may look like an expression but it is not. We should
2887 // avoid parsing these modifiers as expressions. Currently
2888 // recognized sequences are:
2889 //   |...|
2890 //   abs(...)
2891 //   neg(...)
2892 //   sext(...)
2893 //   -reg
2894 //   -|...|
2895 //   -abs(...)
2896 //   name:...
2897 // Note that simple opcode modifiers like 'gds' may be parsed as
2898 // expressions; this is a special case. See getExpressionAsToken.
2899 //
2900 bool
2901 AMDGPUAsmParser::isModifier() {
2902 
2903   AsmToken Tok = getToken();
2904   AsmToken NextToken[2];
2905   peekTokens(NextToken);
2906 
2907   return isOperandModifier(Tok, NextToken[0]) ||
2908          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2909          isOpcodeModifierWithVal(Tok, NextToken[0]);
2910 }
2911 
2912 // Check if the current token is an SP3 'neg' modifier.
2913 // Currently this modifier is allowed in the following context:
2914 //
2915 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2916 // 2. Before an 'abs' modifier: -abs(...)
2917 // 3. Before an SP3 'abs' modifier: -|...|
2918 //
2919 // In all other cases "-" is handled as a part
2920 // of an expression that follows the sign.
2921 //
2922 // Note: When "-" is followed by an integer literal,
2923 // this is interpreted as integer negation rather
2924 // than a floating-point NEG modifier applied to N.
2925 // Beside being contr-intuitive, such use of floating-point
2926 // NEG modifier would have resulted in different meaning
2927 // of integer literals used with VOP1/2/C and VOP3,
2928 // for example:
2929 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2930 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2931 // Negative fp literals with preceding "-" are
2932 // handled likewise for unifomtity
2933 //
2934 bool
2935 AMDGPUAsmParser::parseSP3NegModifier() {
2936 
2937   AsmToken NextToken[2];
2938   peekTokens(NextToken);
2939 
2940   if (isToken(AsmToken::Minus) &&
2941       (isRegister(NextToken[0], NextToken[1]) ||
2942        NextToken[0].is(AsmToken::Pipe) ||
2943        isId(NextToken[0], "abs"))) {
2944     lex();
2945     return true;
2946   }
2947 
2948   return false;
2949 }
2950 
2951 OperandMatchResultTy
2952 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2953                                               bool AllowImm) {
2954   bool Neg, SP3Neg;
2955   bool Abs, SP3Abs;
2956   SMLoc Loc;
2957 
2958   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2959   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2960     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2961     return MatchOperand_ParseFail;
2962   }
2963 
2964   SP3Neg = parseSP3NegModifier();
2965 
2966   Loc = getLoc();
2967   Neg = trySkipId("neg");
2968   if (Neg && SP3Neg) {
2969     Error(Loc, "expected register or immediate");
2970     return MatchOperand_ParseFail;
2971   }
2972   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2973     return MatchOperand_ParseFail;
2974 
2975   Abs = trySkipId("abs");
2976   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2977     return MatchOperand_ParseFail;
2978 
2979   Loc = getLoc();
2980   SP3Abs = trySkipToken(AsmToken::Pipe);
2981   if (Abs && SP3Abs) {
2982     Error(Loc, "expected register or immediate");
2983     return MatchOperand_ParseFail;
2984   }
2985 
2986   OperandMatchResultTy Res;
2987   if (AllowImm) {
2988     Res = parseRegOrImm(Operands, SP3Abs);
2989   } else {
2990     Res = parseReg(Operands);
2991   }
2992   if (Res != MatchOperand_Success) {
2993     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2994   }
2995 
2996   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2997     return MatchOperand_ParseFail;
2998   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999     return MatchOperand_ParseFail;
3000   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3001     return MatchOperand_ParseFail;
3002 
3003   AMDGPUOperand::Modifiers Mods;
3004   Mods.Abs = Abs || SP3Abs;
3005   Mods.Neg = Neg || SP3Neg;
3006 
3007   if (Mods.hasFPModifiers()) {
3008     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3009     if (Op.isExpr()) {
3010       Error(Op.getStartLoc(), "expected an absolute expression");
3011       return MatchOperand_ParseFail;
3012     }
3013     Op.setModifiers(Mods);
3014   }
3015   return MatchOperand_Success;
3016 }
3017 
3018 OperandMatchResultTy
3019 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3020                                                bool AllowImm) {
3021   bool Sext = trySkipId("sext");
3022   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3023     return MatchOperand_ParseFail;
3024 
3025   OperandMatchResultTy Res;
3026   if (AllowImm) {
3027     Res = parseRegOrImm(Operands);
3028   } else {
3029     Res = parseReg(Operands);
3030   }
3031   if (Res != MatchOperand_Success) {
3032     return Sext? MatchOperand_ParseFail : Res;
3033   }
3034 
3035   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3036     return MatchOperand_ParseFail;
3037 
3038   AMDGPUOperand::Modifiers Mods;
3039   Mods.Sext = Sext;
3040 
3041   if (Mods.hasIntModifiers()) {
3042     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3043     if (Op.isExpr()) {
3044       Error(Op.getStartLoc(), "expected an absolute expression");
3045       return MatchOperand_ParseFail;
3046     }
3047     Op.setModifiers(Mods);
3048   }
3049 
3050   return MatchOperand_Success;
3051 }
3052 
3053 OperandMatchResultTy
3054 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3055   return parseRegOrImmWithFPInputMods(Operands, false);
3056 }
3057 
3058 OperandMatchResultTy
3059 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3060   return parseRegOrImmWithIntInputMods(Operands, false);
3061 }
3062 
3063 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3064   auto Loc = getLoc();
3065   if (trySkipId("off")) {
3066     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3067                                                 AMDGPUOperand::ImmTyOff, false));
3068     return MatchOperand_Success;
3069   }
3070 
3071   if (!isRegister())
3072     return MatchOperand_NoMatch;
3073 
3074   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3075   if (Reg) {
3076     Operands.push_back(std::move(Reg));
3077     return MatchOperand_Success;
3078   }
3079 
3080   return MatchOperand_ParseFail;
3081 
3082 }
3083 
3084 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3085   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3086 
3087   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3088       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3089       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3090       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3091     return Match_InvalidOperand;
3092 
3093   if ((TSFlags & SIInstrFlags::VOP3) &&
3094       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3095       getForcedEncodingSize() != 64)
3096     return Match_PreferE32;
3097 
3098   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3099       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3100     // v_mac_f32/16 allow only dst_sel == DWORD;
3101     auto OpNum =
3102         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3103     const auto &Op = Inst.getOperand(OpNum);
3104     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3105       return Match_InvalidOperand;
3106     }
3107   }
3108 
3109   return Match_Success;
3110 }
3111 
3112 static ArrayRef<unsigned> getAllVariants() {
3113   static const unsigned Variants[] = {
3114     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3115     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3116   };
3117 
3118   return makeArrayRef(Variants);
3119 }
3120 
3121 // What asm variants we should check
3122 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3123   if (getForcedEncodingSize() == 32) {
3124     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3125     return makeArrayRef(Variants);
3126   }
3127 
3128   if (isForcedVOP3()) {
3129     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3130     return makeArrayRef(Variants);
3131   }
3132 
3133   if (isForcedSDWA()) {
3134     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3135                                         AMDGPUAsmVariants::SDWA9};
3136     return makeArrayRef(Variants);
3137   }
3138 
3139   if (isForcedDPP()) {
3140     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3141     return makeArrayRef(Variants);
3142   }
3143 
3144   return getAllVariants();
3145 }
3146 
3147 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3148   if (getForcedEncodingSize() == 32)
3149     return "e32";
3150 
3151   if (isForcedVOP3())
3152     return "e64";
3153 
3154   if (isForcedSDWA())
3155     return "sdwa";
3156 
3157   if (isForcedDPP())
3158     return "dpp";
3159 
3160   return "";
3161 }
3162 
3163 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3164   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3165   const unsigned Num = Desc.getNumImplicitUses();
3166   for (unsigned i = 0; i < Num; ++i) {
3167     unsigned Reg = Desc.ImplicitUses[i];
3168     switch (Reg) {
3169     case AMDGPU::FLAT_SCR:
3170     case AMDGPU::VCC:
3171     case AMDGPU::VCC_LO:
3172     case AMDGPU::VCC_HI:
3173     case AMDGPU::M0:
3174       return Reg;
3175     default:
3176       break;
3177     }
3178   }
3179   return AMDGPU::NoRegister;
3180 }
3181 
3182 // NB: This code is correct only when used to check constant
3183 // bus limitations because GFX7 support no f16 inline constants.
3184 // Note that there are no cases when a GFX7 opcode violates
3185 // constant bus limitations due to the use of an f16 constant.
3186 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3187                                        unsigned OpIdx) const {
3188   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3189 
3190   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3191     return false;
3192   }
3193 
3194   const MCOperand &MO = Inst.getOperand(OpIdx);
3195 
3196   int64_t Val = MO.getImm();
3197   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3198 
3199   switch (OpSize) { // expected operand size
3200   case 8:
3201     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3202   case 4:
3203     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3204   case 2: {
3205     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3206     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3207         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3208         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3209       return AMDGPU::isInlinableIntLiteral(Val);
3210 
3211     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3212         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3213         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3214       return AMDGPU::isInlinableIntLiteralV216(Val);
3215 
3216     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3217         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3218         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3219       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3220 
3221     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3222   }
3223   default:
3224     llvm_unreachable("invalid operand size");
3225   }
3226 }
3227 
3228 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3229   if (!isGFX10Plus())
3230     return 1;
3231 
3232   switch (Opcode) {
3233   // 64-bit shift instructions can use only one scalar value input
3234   case AMDGPU::V_LSHLREV_B64_e64:
3235   case AMDGPU::V_LSHLREV_B64_gfx10:
3236   case AMDGPU::V_LSHRREV_B64_e64:
3237   case AMDGPU::V_LSHRREV_B64_gfx10:
3238   case AMDGPU::V_ASHRREV_I64_e64:
3239   case AMDGPU::V_ASHRREV_I64_gfx10:
3240   case AMDGPU::V_LSHL_B64_e64:
3241   case AMDGPU::V_LSHR_B64_e64:
3242   case AMDGPU::V_ASHR_I64_e64:
3243     return 1;
3244   default:
3245     return 2;
3246   }
3247 }
3248 
3249 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3250   const MCOperand &MO = Inst.getOperand(OpIdx);
3251   if (MO.isImm()) {
3252     return !isInlineConstant(Inst, OpIdx);
3253   } else if (MO.isReg()) {
3254     auto Reg = MO.getReg();
3255     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3256     auto PReg = mc2PseudoReg(Reg);
3257     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3258   } else {
3259     return true;
3260   }
3261 }
3262 
3263 bool
3264 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3265                                                 const OperandVector &Operands) {
3266   const unsigned Opcode = Inst.getOpcode();
3267   const MCInstrDesc &Desc = MII.get(Opcode);
3268   unsigned LastSGPR = AMDGPU::NoRegister;
3269   unsigned ConstantBusUseCount = 0;
3270   unsigned NumLiterals = 0;
3271   unsigned LiteralSize;
3272 
3273   if (Desc.TSFlags &
3274       (SIInstrFlags::VOPC |
3275        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3276        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3277        SIInstrFlags::SDWA)) {
3278     // Check special imm operands (used by madmk, etc)
3279     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3280       ++NumLiterals;
3281       LiteralSize = 4;
3282     }
3283 
3284     SmallDenseSet<unsigned> SGPRsUsed;
3285     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3286     if (SGPRUsed != AMDGPU::NoRegister) {
3287       SGPRsUsed.insert(SGPRUsed);
3288       ++ConstantBusUseCount;
3289     }
3290 
3291     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3292     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3293     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3294 
3295     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3296 
3297     for (int OpIdx : OpIndices) {
3298       if (OpIdx == -1) break;
3299 
3300       const MCOperand &MO = Inst.getOperand(OpIdx);
3301       if (usesConstantBus(Inst, OpIdx)) {
3302         if (MO.isReg()) {
3303           LastSGPR = mc2PseudoReg(MO.getReg());
3304           // Pairs of registers with a partial intersections like these
3305           //   s0, s[0:1]
3306           //   flat_scratch_lo, flat_scratch
3307           //   flat_scratch_lo, flat_scratch_hi
3308           // are theoretically valid but they are disabled anyway.
3309           // Note that this code mimics SIInstrInfo::verifyInstruction
3310           if (!SGPRsUsed.count(LastSGPR)) {
3311             SGPRsUsed.insert(LastSGPR);
3312             ++ConstantBusUseCount;
3313           }
3314         } else { // Expression or a literal
3315 
3316           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3317             continue; // special operand like VINTERP attr_chan
3318 
3319           // An instruction may use only one literal.
3320           // This has been validated on the previous step.
3321           // See validateVOPLiteral.
3322           // This literal may be used as more than one operand.
3323           // If all these operands are of the same size,
3324           // this literal counts as one scalar value.
3325           // Otherwise it counts as 2 scalar values.
3326           // See "GFX10 Shader Programming", section 3.6.2.3.
3327 
3328           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3329           if (Size < 4) Size = 4;
3330 
3331           if (NumLiterals == 0) {
3332             NumLiterals = 1;
3333             LiteralSize = Size;
3334           } else if (LiteralSize != Size) {
3335             NumLiterals = 2;
3336           }
3337         }
3338       }
3339     }
3340   }
3341   ConstantBusUseCount += NumLiterals;
3342 
3343   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3344     return true;
3345 
3346   SMLoc LitLoc = getLitLoc(Operands);
3347   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3348   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3349   Error(Loc, "invalid operand (violates constant bus restrictions)");
3350   return false;
3351 }
3352 
3353 bool
3354 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3355                                                  const OperandVector &Operands) {
3356   const unsigned Opcode = Inst.getOpcode();
3357   const MCInstrDesc &Desc = MII.get(Opcode);
3358 
3359   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3360   if (DstIdx == -1 ||
3361       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3362     return true;
3363   }
3364 
3365   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3366 
3367   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3368   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3369   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3370 
3371   assert(DstIdx != -1);
3372   const MCOperand &Dst = Inst.getOperand(DstIdx);
3373   assert(Dst.isReg());
3374 
3375   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3376 
3377   for (int SrcIdx : SrcIndices) {
3378     if (SrcIdx == -1) break;
3379     const MCOperand &Src = Inst.getOperand(SrcIdx);
3380     if (Src.isReg()) {
3381       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3382         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3383         Error(getRegLoc(SrcReg, Operands),
3384           "destination must be different than all sources");
3385         return false;
3386       }
3387     }
3388   }
3389 
3390   return true;
3391 }
3392 
3393 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3394 
3395   const unsigned Opc = Inst.getOpcode();
3396   const MCInstrDesc &Desc = MII.get(Opc);
3397 
3398   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3399     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3400     assert(ClampIdx != -1);
3401     return Inst.getOperand(ClampIdx).getImm() == 0;
3402   }
3403 
3404   return true;
3405 }
3406 
3407 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3408 
3409   const unsigned Opc = Inst.getOpcode();
3410   const MCInstrDesc &Desc = MII.get(Opc);
3411 
3412   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3413     return true;
3414 
3415   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3416   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3417   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3418 
3419   assert(VDataIdx != -1);
3420 
3421   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3422     return true;
3423 
3424   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3425   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3426   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3427   if (DMask == 0)
3428     DMask = 1;
3429 
3430   unsigned DataSize =
3431     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3432   if (hasPackedD16()) {
3433     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3434     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3435       DataSize = (DataSize + 1) / 2;
3436   }
3437 
3438   return (VDataSize / 4) == DataSize + TFESize;
3439 }
3440 
3441 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3442   const unsigned Opc = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opc);
3444 
3445   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3446     return true;
3447 
3448   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3449 
3450   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3451       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3452   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3453   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3454   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3455   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3456 
3457   assert(VAddr0Idx != -1);
3458   assert(SrsrcIdx != -1);
3459   assert(SrsrcIdx > VAddr0Idx);
3460 
3461   if (DimIdx == -1)
3462     return true; // intersect_ray
3463 
3464   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3465   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3466   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3467   unsigned ActualAddrSize =
3468       IsNSA ? SrsrcIdx - VAddr0Idx
3469             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3470   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3471 
3472   unsigned ExpectedAddrSize =
3473       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3474 
3475   if (!IsNSA) {
3476     if (ExpectedAddrSize > 8)
3477       ExpectedAddrSize = 16;
3478 
3479     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3480     // This provides backward compatibility for assembly created
3481     // before 160b/192b/224b types were directly supported.
3482     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3483       return true;
3484   }
3485 
3486   return ActualAddrSize == ExpectedAddrSize;
3487 }
3488 
3489 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3490 
3491   const unsigned Opc = Inst.getOpcode();
3492   const MCInstrDesc &Desc = MII.get(Opc);
3493 
3494   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3495     return true;
3496   if (!Desc.mayLoad() || !Desc.mayStore())
3497     return true; // Not atomic
3498 
3499   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3500   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3501 
3502   // This is an incomplete check because image_atomic_cmpswap
3503   // may only use 0x3 and 0xf while other atomic operations
3504   // may use 0x1 and 0x3. However these limitations are
3505   // verified when we check that dmask matches dst size.
3506   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3507 }
3508 
3509 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3510 
3511   const unsigned Opc = Inst.getOpcode();
3512   const MCInstrDesc &Desc = MII.get(Opc);
3513 
3514   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3515     return true;
3516 
3517   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3518   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3519 
3520   // GATHER4 instructions use dmask in a different fashion compared to
3521   // other MIMG instructions. The only useful DMASK values are
3522   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3523   // (red,red,red,red) etc.) The ISA document doesn't mention
3524   // this.
3525   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3526 }
3527 
3528 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3529   const unsigned Opc = Inst.getOpcode();
3530   const MCInstrDesc &Desc = MII.get(Opc);
3531 
3532   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3533     return true;
3534 
3535   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3536   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3537       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3538 
3539   if (!BaseOpcode->MSAA)
3540     return true;
3541 
3542   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3543   assert(DimIdx != -1);
3544 
3545   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3546   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3547 
3548   return DimInfo->MSAA;
3549 }
3550 
3551 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3552 {
3553   switch (Opcode) {
3554   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3555   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3556   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3557     return true;
3558   default:
3559     return false;
3560   }
3561 }
3562 
3563 // movrels* opcodes should only allow VGPRS as src0.
3564 // This is specified in .td description for vop1/vop3,
3565 // but sdwa is handled differently. See isSDWAOperand.
3566 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3567                                       const OperandVector &Operands) {
3568 
3569   const unsigned Opc = Inst.getOpcode();
3570   const MCInstrDesc &Desc = MII.get(Opc);
3571 
3572   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3573     return true;
3574 
3575   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3576   assert(Src0Idx != -1);
3577 
3578   SMLoc ErrLoc;
3579   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3580   if (Src0.isReg()) {
3581     auto Reg = mc2PseudoReg(Src0.getReg());
3582     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3583     if (!isSGPR(Reg, TRI))
3584       return true;
3585     ErrLoc = getRegLoc(Reg, Operands);
3586   } else {
3587     ErrLoc = getConstLoc(Operands);
3588   }
3589 
3590   Error(ErrLoc, "source operand must be a VGPR");
3591   return false;
3592 }
3593 
3594 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3595                                           const OperandVector &Operands) {
3596 
3597   const unsigned Opc = Inst.getOpcode();
3598 
3599   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3600     return true;
3601 
3602   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3603   assert(Src0Idx != -1);
3604 
3605   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3606   if (!Src0.isReg())
3607     return true;
3608 
3609   auto Reg = mc2PseudoReg(Src0.getReg());
3610   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3611   if (isSGPR(Reg, TRI)) {
3612     Error(getRegLoc(Reg, Operands),
3613           "source operand must be either a VGPR or an inline constant");
3614     return false;
3615   }
3616 
3617   return true;
3618 }
3619 
3620 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3621                                    const OperandVector &Operands) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3626     return true;
3627 
3628   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3629   if (Src2Idx == -1)
3630     return true;
3631 
3632   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3633   if (!Src2.isReg())
3634     return true;
3635 
3636   MCRegister Src2Reg = Src2.getReg();
3637   MCRegister DstReg = Inst.getOperand(0).getReg();
3638   if (Src2Reg == DstReg)
3639     return true;
3640 
3641   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3642   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3643     return true;
3644 
3645   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3646     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3647           "source 2 operand must not partially overlap with dst");
3648     return false;
3649   }
3650 
3651   return true;
3652 }
3653 
3654 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3655   switch (Inst.getOpcode()) {
3656   default:
3657     return true;
3658   case V_DIV_SCALE_F32_gfx6_gfx7:
3659   case V_DIV_SCALE_F32_vi:
3660   case V_DIV_SCALE_F32_gfx10:
3661   case V_DIV_SCALE_F64_gfx6_gfx7:
3662   case V_DIV_SCALE_F64_vi:
3663   case V_DIV_SCALE_F64_gfx10:
3664     break;
3665   }
3666 
3667   // TODO: Check that src0 = src1 or src2.
3668 
3669   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3670                     AMDGPU::OpName::src2_modifiers,
3671                     AMDGPU::OpName::src2_modifiers}) {
3672     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3673             .getImm() &
3674         SISrcMods::ABS) {
3675       return false;
3676     }
3677   }
3678 
3679   return true;
3680 }
3681 
3682 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3683 
3684   const unsigned Opc = Inst.getOpcode();
3685   const MCInstrDesc &Desc = MII.get(Opc);
3686 
3687   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3688     return true;
3689 
3690   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3691   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3692     if (isCI() || isSI())
3693       return false;
3694   }
3695 
3696   return true;
3697 }
3698 
3699 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3700   const unsigned Opc = Inst.getOpcode();
3701   const MCInstrDesc &Desc = MII.get(Opc);
3702 
3703   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3704     return true;
3705 
3706   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3707   if (DimIdx < 0)
3708     return true;
3709 
3710   long Imm = Inst.getOperand(DimIdx).getImm();
3711   if (Imm < 0 || Imm >= 8)
3712     return false;
3713 
3714   return true;
3715 }
3716 
3717 static bool IsRevOpcode(const unsigned Opcode)
3718 {
3719   switch (Opcode) {
3720   case AMDGPU::V_SUBREV_F32_e32:
3721   case AMDGPU::V_SUBREV_F32_e64:
3722   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3723   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3724   case AMDGPU::V_SUBREV_F32_e32_vi:
3725   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3726   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3727   case AMDGPU::V_SUBREV_F32_e64_vi:
3728 
3729   case AMDGPU::V_SUBREV_CO_U32_e32:
3730   case AMDGPU::V_SUBREV_CO_U32_e64:
3731   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3732   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3733 
3734   case AMDGPU::V_SUBBREV_U32_e32:
3735   case AMDGPU::V_SUBBREV_U32_e64:
3736   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3737   case AMDGPU::V_SUBBREV_U32_e32_vi:
3738   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3739   case AMDGPU::V_SUBBREV_U32_e64_vi:
3740 
3741   case AMDGPU::V_SUBREV_U32_e32:
3742   case AMDGPU::V_SUBREV_U32_e64:
3743   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3744   case AMDGPU::V_SUBREV_U32_e32_vi:
3745   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3746   case AMDGPU::V_SUBREV_U32_e64_vi:
3747 
3748   case AMDGPU::V_SUBREV_F16_e32:
3749   case AMDGPU::V_SUBREV_F16_e64:
3750   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3751   case AMDGPU::V_SUBREV_F16_e32_vi:
3752   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3753   case AMDGPU::V_SUBREV_F16_e64_vi:
3754 
3755   case AMDGPU::V_SUBREV_U16_e32:
3756   case AMDGPU::V_SUBREV_U16_e64:
3757   case AMDGPU::V_SUBREV_U16_e32_vi:
3758   case AMDGPU::V_SUBREV_U16_e64_vi:
3759 
3760   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3761   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3762   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3763 
3764   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3765   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3766 
3767   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3768   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3769 
3770   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3771   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3772 
3773   case AMDGPU::V_LSHRREV_B32_e32:
3774   case AMDGPU::V_LSHRREV_B32_e64:
3775   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3776   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3777   case AMDGPU::V_LSHRREV_B32_e32_vi:
3778   case AMDGPU::V_LSHRREV_B32_e64_vi:
3779   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3780   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3781 
3782   case AMDGPU::V_ASHRREV_I32_e32:
3783   case AMDGPU::V_ASHRREV_I32_e64:
3784   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3785   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3786   case AMDGPU::V_ASHRREV_I32_e32_vi:
3787   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3788   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3789   case AMDGPU::V_ASHRREV_I32_e64_vi:
3790 
3791   case AMDGPU::V_LSHLREV_B32_e32:
3792   case AMDGPU::V_LSHLREV_B32_e64:
3793   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3794   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3795   case AMDGPU::V_LSHLREV_B32_e32_vi:
3796   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3797   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3798   case AMDGPU::V_LSHLREV_B32_e64_vi:
3799 
3800   case AMDGPU::V_LSHLREV_B16_e32:
3801   case AMDGPU::V_LSHLREV_B16_e64:
3802   case AMDGPU::V_LSHLREV_B16_e32_vi:
3803   case AMDGPU::V_LSHLREV_B16_e64_vi:
3804   case AMDGPU::V_LSHLREV_B16_gfx10:
3805 
3806   case AMDGPU::V_LSHRREV_B16_e32:
3807   case AMDGPU::V_LSHRREV_B16_e64:
3808   case AMDGPU::V_LSHRREV_B16_e32_vi:
3809   case AMDGPU::V_LSHRREV_B16_e64_vi:
3810   case AMDGPU::V_LSHRREV_B16_gfx10:
3811 
3812   case AMDGPU::V_ASHRREV_I16_e32:
3813   case AMDGPU::V_ASHRREV_I16_e64:
3814   case AMDGPU::V_ASHRREV_I16_e32_vi:
3815   case AMDGPU::V_ASHRREV_I16_e64_vi:
3816   case AMDGPU::V_ASHRREV_I16_gfx10:
3817 
3818   case AMDGPU::V_LSHLREV_B64_e64:
3819   case AMDGPU::V_LSHLREV_B64_gfx10:
3820   case AMDGPU::V_LSHLREV_B64_vi:
3821 
3822   case AMDGPU::V_LSHRREV_B64_e64:
3823   case AMDGPU::V_LSHRREV_B64_gfx10:
3824   case AMDGPU::V_LSHRREV_B64_vi:
3825 
3826   case AMDGPU::V_ASHRREV_I64_e64:
3827   case AMDGPU::V_ASHRREV_I64_gfx10:
3828   case AMDGPU::V_ASHRREV_I64_vi:
3829 
3830   case AMDGPU::V_PK_LSHLREV_B16:
3831   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3832   case AMDGPU::V_PK_LSHLREV_B16_vi:
3833 
3834   case AMDGPU::V_PK_LSHRREV_B16:
3835   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3836   case AMDGPU::V_PK_LSHRREV_B16_vi:
3837   case AMDGPU::V_PK_ASHRREV_I16:
3838   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3839   case AMDGPU::V_PK_ASHRREV_I16_vi:
3840     return true;
3841   default:
3842     return false;
3843   }
3844 }
3845 
3846 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3847 
3848   using namespace SIInstrFlags;
3849   const unsigned Opcode = Inst.getOpcode();
3850   const MCInstrDesc &Desc = MII.get(Opcode);
3851 
3852   // lds_direct register is defined so that it can be used
3853   // with 9-bit operands only. Ignore encodings which do not accept these.
3854   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3855   if ((Desc.TSFlags & Enc) == 0)
3856     return None;
3857 
3858   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3859     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3860     if (SrcIdx == -1)
3861       break;
3862     const auto &Src = Inst.getOperand(SrcIdx);
3863     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3864 
3865       if (isGFX90A())
3866         return StringRef("lds_direct is not supported on this GPU");
3867 
3868       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3869         return StringRef("lds_direct cannot be used with this instruction");
3870 
3871       if (SrcName != OpName::src0)
3872         return StringRef("lds_direct may be used as src0 only");
3873     }
3874   }
3875 
3876   return None;
3877 }
3878 
3879 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3880   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3881     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3882     if (Op.isFlatOffset())
3883       return Op.getStartLoc();
3884   }
3885   return getLoc();
3886 }
3887 
3888 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3889                                          const OperandVector &Operands) {
3890   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3891   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3892     return true;
3893 
3894   auto Opcode = Inst.getOpcode();
3895   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3896   assert(OpNum != -1);
3897 
3898   const auto &Op = Inst.getOperand(OpNum);
3899   if (!hasFlatOffsets() && Op.getImm() != 0) {
3900     Error(getFlatOffsetLoc(Operands),
3901           "flat offset modifier is not supported on this GPU");
3902     return false;
3903   }
3904 
3905   // For FLAT segment the offset must be positive;
3906   // MSB is ignored and forced to zero.
3907   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3908     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3909     if (!isIntN(OffsetSize, Op.getImm())) {
3910       Error(getFlatOffsetLoc(Operands),
3911             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3912       return false;
3913     }
3914   } else {
3915     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3916     if (!isUIntN(OffsetSize, Op.getImm())) {
3917       Error(getFlatOffsetLoc(Operands),
3918             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3919       return false;
3920     }
3921   }
3922 
3923   return true;
3924 }
3925 
3926 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3927   // Start with second operand because SMEM Offset cannot be dst or src0.
3928   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3930     if (Op.isSMEMOffset())
3931       return Op.getStartLoc();
3932   }
3933   return getLoc();
3934 }
3935 
3936 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3937                                          const OperandVector &Operands) {
3938   if (isCI() || isSI())
3939     return true;
3940 
3941   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3942   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3943     return true;
3944 
3945   auto Opcode = Inst.getOpcode();
3946   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3947   if (OpNum == -1)
3948     return true;
3949 
3950   const auto &Op = Inst.getOperand(OpNum);
3951   if (!Op.isImm())
3952     return true;
3953 
3954   uint64_t Offset = Op.getImm();
3955   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3956   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3957       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3958     return true;
3959 
3960   Error(getSMEMOffsetLoc(Operands),
3961         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3962                                "expected a 21-bit signed offset");
3963 
3964   return false;
3965 }
3966 
3967 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3968   unsigned Opcode = Inst.getOpcode();
3969   const MCInstrDesc &Desc = MII.get(Opcode);
3970   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3971     return true;
3972 
3973   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3974   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3975 
3976   const int OpIndices[] = { Src0Idx, Src1Idx };
3977 
3978   unsigned NumExprs = 0;
3979   unsigned NumLiterals = 0;
3980   uint32_t LiteralValue;
3981 
3982   for (int OpIdx : OpIndices) {
3983     if (OpIdx == -1) break;
3984 
3985     const MCOperand &MO = Inst.getOperand(OpIdx);
3986     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3987     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3988       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3989         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3990         if (NumLiterals == 0 || LiteralValue != Value) {
3991           LiteralValue = Value;
3992           ++NumLiterals;
3993         }
3994       } else if (MO.isExpr()) {
3995         ++NumExprs;
3996       }
3997     }
3998   }
3999 
4000   return NumLiterals + NumExprs <= 1;
4001 }
4002 
4003 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4004   const unsigned Opc = Inst.getOpcode();
4005   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4006       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4007     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4008     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4009 
4010     if (OpSel & ~3)
4011       return false;
4012   }
4013   return true;
4014 }
4015 
4016 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4017                                   const OperandVector &Operands) {
4018   const unsigned Opc = Inst.getOpcode();
4019   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4020   if (DppCtrlIdx < 0)
4021     return true;
4022   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4023 
4024   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4025     // DPP64 is supported for row_newbcast only.
4026     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4027     if (Src0Idx >= 0 &&
4028         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4029       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4030       Error(S, "64 bit dpp only supports row_newbcast");
4031       return false;
4032     }
4033   }
4034 
4035   return true;
4036 }
4037 
4038 // Check if VCC register matches wavefront size
4039 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4040   auto FB = getFeatureBits();
4041   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4042     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4043 }
4044 
4045 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4046 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4047                                          const OperandVector &Operands) {
4048   unsigned Opcode = Inst.getOpcode();
4049   const MCInstrDesc &Desc = MII.get(Opcode);
4050   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4051   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4052       ImmIdx == -1)
4053     return true;
4054 
4055   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4056   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4057   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4058 
4059   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4060 
4061   unsigned NumExprs = 0;
4062   unsigned NumLiterals = 0;
4063   uint32_t LiteralValue;
4064 
4065   for (int OpIdx : OpIndices) {
4066     if (OpIdx == -1)
4067       continue;
4068 
4069     const MCOperand &MO = Inst.getOperand(OpIdx);
4070     if (!MO.isImm() && !MO.isExpr())
4071       continue;
4072     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4073       continue;
4074 
4075     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4076         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4077       Error(getConstLoc(Operands),
4078             "inline constants are not allowed for this operand");
4079       return false;
4080     }
4081 
4082     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4083       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4084       if (NumLiterals == 0 || LiteralValue != Value) {
4085         LiteralValue = Value;
4086         ++NumLiterals;
4087       }
4088     } else if (MO.isExpr()) {
4089       ++NumExprs;
4090     }
4091   }
4092   NumLiterals += NumExprs;
4093 
4094   if (!NumLiterals)
4095     return true;
4096 
4097   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4098     Error(getLitLoc(Operands), "literal operands are not supported");
4099     return false;
4100   }
4101 
4102   if (NumLiterals > 1) {
4103     Error(getLitLoc(Operands), "only one literal operand is allowed");
4104     return false;
4105   }
4106 
4107   return true;
4108 }
4109 
4110 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4111 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4112                          const MCRegisterInfo *MRI) {
4113   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4114   if (OpIdx < 0)
4115     return -1;
4116 
4117   const MCOperand &Op = Inst.getOperand(OpIdx);
4118   if (!Op.isReg())
4119     return -1;
4120 
4121   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4122   auto Reg = Sub ? Sub : Op.getReg();
4123   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4124   return AGPR32.contains(Reg) ? 1 : 0;
4125 }
4126 
4127 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4128   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4129   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4130                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4131                   SIInstrFlags::DS)) == 0)
4132     return true;
4133 
4134   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4135                                                       : AMDGPU::OpName::vdata;
4136 
4137   const MCRegisterInfo *MRI = getMRI();
4138   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4139   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4140 
4141   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4142     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4143     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4144       return false;
4145   }
4146 
4147   auto FB = getFeatureBits();
4148   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4149     if (DataAreg < 0 || DstAreg < 0)
4150       return true;
4151     return DstAreg == DataAreg;
4152   }
4153 
4154   return DstAreg < 1 && DataAreg < 1;
4155 }
4156 
4157 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4158   auto FB = getFeatureBits();
4159   if (!FB[AMDGPU::FeatureGFX90AInsts])
4160     return true;
4161 
4162   const MCRegisterInfo *MRI = getMRI();
4163   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4164   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4165   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4166     const MCOperand &Op = Inst.getOperand(I);
4167     if (!Op.isReg())
4168       continue;
4169 
4170     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4171     if (!Sub)
4172       continue;
4173 
4174     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4175       return false;
4176     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4177       return false;
4178   }
4179 
4180   return true;
4181 }
4182 
4183 // gfx90a has an undocumented limitation:
4184 // DS_GWS opcodes must use even aligned registers.
4185 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4186                                   const OperandVector &Operands) {
4187   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4188     return true;
4189 
4190   int Opc = Inst.getOpcode();
4191   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4192       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4193     return true;
4194 
4195   const MCRegisterInfo *MRI = getMRI();
4196   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4197   int Data0Pos =
4198       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4199   assert(Data0Pos != -1);
4200   auto Reg = Inst.getOperand(Data0Pos).getReg();
4201   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4202   if (RegIdx & 1) {
4203     SMLoc RegLoc = getRegLoc(Reg, Operands);
4204     Error(RegLoc, "vgpr must be even aligned");
4205     return false;
4206   }
4207 
4208   return true;
4209 }
4210 
4211 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4212                                             const OperandVector &Operands,
4213                                             const SMLoc &IDLoc) {
4214   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4215                                            AMDGPU::OpName::cpol);
4216   if (CPolPos == -1)
4217     return true;
4218 
4219   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4220 
4221   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4222   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4223       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4224     Error(IDLoc, "invalid cache policy for SMRD instruction");
4225     return false;
4226   }
4227 
4228   if (isGFX90A() && (CPol & CPol::SCC)) {
4229     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4230     StringRef CStr(S.getPointer());
4231     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4232     Error(S, "scc is not supported on this GPU");
4233     return false;
4234   }
4235 
4236   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4237     return true;
4238 
4239   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4240     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4241       Error(IDLoc, "instruction must use glc");
4242       return false;
4243     }
4244   } else {
4245     if (CPol & CPol::GLC) {
4246       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4247       StringRef CStr(S.getPointer());
4248       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4249       Error(S, "instruction must not use glc");
4250       return false;
4251     }
4252   }
4253 
4254   return true;
4255 }
4256 
4257 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4258                                           const SMLoc &IDLoc,
4259                                           const OperandVector &Operands) {
4260   if (auto ErrMsg = validateLdsDirect(Inst)) {
4261     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4262     return false;
4263   }
4264   if (!validateSOPLiteral(Inst)) {
4265     Error(getLitLoc(Operands),
4266       "only one literal operand is allowed");
4267     return false;
4268   }
4269   if (!validateVOPLiteral(Inst, Operands)) {
4270     return false;
4271   }
4272   if (!validateConstantBusLimitations(Inst, Operands)) {
4273     return false;
4274   }
4275   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4276     return false;
4277   }
4278   if (!validateIntClampSupported(Inst)) {
4279     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4280       "integer clamping is not supported on this GPU");
4281     return false;
4282   }
4283   if (!validateOpSel(Inst)) {
4284     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4285       "invalid op_sel operand");
4286     return false;
4287   }
4288   if (!validateDPP(Inst, Operands)) {
4289     return false;
4290   }
4291   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4292   if (!validateMIMGD16(Inst)) {
4293     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4294       "d16 modifier is not supported on this GPU");
4295     return false;
4296   }
4297   if (!validateMIMGDim(Inst)) {
4298     Error(IDLoc, "dim modifier is required on this GPU");
4299     return false;
4300   }
4301   if (!validateMIMGMSAA(Inst)) {
4302     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4303           "invalid dim; must be MSAA type");
4304     return false;
4305   }
4306   if (!validateMIMGDataSize(Inst)) {
4307     Error(IDLoc,
4308       "image data size does not match dmask and tfe");
4309     return false;
4310   }
4311   if (!validateMIMGAddrSize(Inst)) {
4312     Error(IDLoc,
4313       "image address size does not match dim and a16");
4314     return false;
4315   }
4316   if (!validateMIMGAtomicDMask(Inst)) {
4317     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4318       "invalid atomic image dmask");
4319     return false;
4320   }
4321   if (!validateMIMGGatherDMask(Inst)) {
4322     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4323       "invalid image_gather dmask: only one bit must be set");
4324     return false;
4325   }
4326   if (!validateMovrels(Inst, Operands)) {
4327     return false;
4328   }
4329   if (!validateFlatOffset(Inst, Operands)) {
4330     return false;
4331   }
4332   if (!validateSMEMOffset(Inst, Operands)) {
4333     return false;
4334   }
4335   if (!validateMAIAccWrite(Inst, Operands)) {
4336     return false;
4337   }
4338   if (!validateMFMA(Inst, Operands)) {
4339     return false;
4340   }
4341   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4342     return false;
4343   }
4344 
4345   if (!validateAGPRLdSt(Inst)) {
4346     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4347     ? "invalid register class: data and dst should be all VGPR or AGPR"
4348     : "invalid register class: agpr loads and stores not supported on this GPU"
4349     );
4350     return false;
4351   }
4352   if (!validateVGPRAlign(Inst)) {
4353     Error(IDLoc,
4354       "invalid register class: vgpr tuples must be 64 bit aligned");
4355     return false;
4356   }
4357   if (!validateGWS(Inst, Operands)) {
4358     return false;
4359   }
4360 
4361   if (!validateDivScale(Inst)) {
4362     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4363     return false;
4364   }
4365   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4366     return false;
4367   }
4368 
4369   return true;
4370 }
4371 
4372 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4373                                             const FeatureBitset &FBS,
4374                                             unsigned VariantID = 0);
4375 
4376 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4377                                 const FeatureBitset &AvailableFeatures,
4378                                 unsigned VariantID);
4379 
4380 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4381                                        const FeatureBitset &FBS) {
4382   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4383 }
4384 
4385 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4386                                        const FeatureBitset &FBS,
4387                                        ArrayRef<unsigned> Variants) {
4388   for (auto Variant : Variants) {
4389     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4390       return true;
4391   }
4392 
4393   return false;
4394 }
4395 
4396 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4397                                                   const SMLoc &IDLoc) {
4398   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4399 
4400   // Check if requested instruction variant is supported.
4401   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4402     return false;
4403 
4404   // This instruction is not supported.
4405   // Clear any other pending errors because they are no longer relevant.
4406   getParser().clearPendingErrors();
4407 
4408   // Requested instruction variant is not supported.
4409   // Check if any other variants are supported.
4410   StringRef VariantName = getMatchedVariantName();
4411   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4412     return Error(IDLoc,
4413                  Twine(VariantName,
4414                        " variant of this instruction is not supported"));
4415   }
4416 
4417   // Finally check if this instruction is supported on any other GPU.
4418   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4419     return Error(IDLoc, "instruction not supported on this GPU");
4420   }
4421 
4422   // Instruction not supported on any GPU. Probably a typo.
4423   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4424   return Error(IDLoc, "invalid instruction" + Suggestion);
4425 }
4426 
4427 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4428                                               OperandVector &Operands,
4429                                               MCStreamer &Out,
4430                                               uint64_t &ErrorInfo,
4431                                               bool MatchingInlineAsm) {
4432   MCInst Inst;
4433   unsigned Result = Match_Success;
4434   for (auto Variant : getMatchedVariants()) {
4435     uint64_t EI;
4436     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4437                                   Variant);
4438     // We order match statuses from least to most specific. We use most specific
4439     // status as resulting
4440     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4441     if ((R == Match_Success) ||
4442         (R == Match_PreferE32) ||
4443         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4444         (R == Match_InvalidOperand && Result != Match_MissingFeature
4445                                    && Result != Match_PreferE32) ||
4446         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4447                                    && Result != Match_MissingFeature
4448                                    && Result != Match_PreferE32)) {
4449       Result = R;
4450       ErrorInfo = EI;
4451     }
4452     if (R == Match_Success)
4453       break;
4454   }
4455 
4456   if (Result == Match_Success) {
4457     if (!validateInstruction(Inst, IDLoc, Operands)) {
4458       return true;
4459     }
4460     Inst.setLoc(IDLoc);
4461     Out.emitInstruction(Inst, getSTI());
4462     return false;
4463   }
4464 
4465   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4466   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4467     return true;
4468   }
4469 
4470   switch (Result) {
4471   default: break;
4472   case Match_MissingFeature:
4473     // It has been verified that the specified instruction
4474     // mnemonic is valid. A match was found but it requires
4475     // features which are not supported on this GPU.
4476     return Error(IDLoc, "operands are not valid for this GPU or mode");
4477 
4478   case Match_InvalidOperand: {
4479     SMLoc ErrorLoc = IDLoc;
4480     if (ErrorInfo != ~0ULL) {
4481       if (ErrorInfo >= Operands.size()) {
4482         return Error(IDLoc, "too few operands for instruction");
4483       }
4484       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4485       if (ErrorLoc == SMLoc())
4486         ErrorLoc = IDLoc;
4487     }
4488     return Error(ErrorLoc, "invalid operand for instruction");
4489   }
4490 
4491   case Match_PreferE32:
4492     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4493                         "should be encoded as e32");
4494   case Match_MnemonicFail:
4495     llvm_unreachable("Invalid instructions should have been handled already");
4496   }
4497   llvm_unreachable("Implement any new match types added!");
4498 }
4499 
4500 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4501   int64_t Tmp = -1;
4502   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4503     return true;
4504   }
4505   if (getParser().parseAbsoluteExpression(Tmp)) {
4506     return true;
4507   }
4508   Ret = static_cast<uint32_t>(Tmp);
4509   return false;
4510 }
4511 
4512 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4513                                                uint32_t &Minor) {
4514   if (ParseAsAbsoluteExpression(Major))
4515     return TokError("invalid major version");
4516 
4517   if (!trySkipToken(AsmToken::Comma))
4518     return TokError("minor version number required, comma expected");
4519 
4520   if (ParseAsAbsoluteExpression(Minor))
4521     return TokError("invalid minor version");
4522 
4523   return false;
4524 }
4525 
4526 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4527   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4528     return TokError("directive only supported for amdgcn architecture");
4529 
4530   std::string TargetIDDirective;
4531   SMLoc TargetStart = getTok().getLoc();
4532   if (getParser().parseEscapedString(TargetIDDirective))
4533     return true;
4534 
4535   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4536   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4537     return getParser().Error(TargetRange.Start,
4538         (Twine(".amdgcn_target directive's target id ") +
4539          Twine(TargetIDDirective) +
4540          Twine(" does not match the specified target id ") +
4541          Twine(getTargetStreamer().getTargetID()->toString())).str());
4542 
4543   return false;
4544 }
4545 
4546 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4547   return Error(Range.Start, "value out of range", Range);
4548 }
4549 
4550 bool AMDGPUAsmParser::calculateGPRBlocks(
4551     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4552     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4553     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4554     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4555   // TODO(scott.linder): These calculations are duplicated from
4556   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4557   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4558 
4559   unsigned NumVGPRs = NextFreeVGPR;
4560   unsigned NumSGPRs = NextFreeSGPR;
4561 
4562   if (Version.Major >= 10)
4563     NumSGPRs = 0;
4564   else {
4565     unsigned MaxAddressableNumSGPRs =
4566         IsaInfo::getAddressableNumSGPRs(&getSTI());
4567 
4568     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4569         NumSGPRs > MaxAddressableNumSGPRs)
4570       return OutOfRangeError(SGPRRange);
4571 
4572     NumSGPRs +=
4573         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4574 
4575     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4576         NumSGPRs > MaxAddressableNumSGPRs)
4577       return OutOfRangeError(SGPRRange);
4578 
4579     if (Features.test(FeatureSGPRInitBug))
4580       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4581   }
4582 
4583   VGPRBlocks =
4584       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4585   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4586 
4587   return false;
4588 }
4589 
4590 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4591   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4592     return TokError("directive only supported for amdgcn architecture");
4593 
4594   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4595     return TokError("directive only supported for amdhsa OS");
4596 
4597   StringRef KernelName;
4598   if (getParser().parseIdentifier(KernelName))
4599     return true;
4600 
4601   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4602 
4603   StringSet<> Seen;
4604 
4605   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4606 
4607   SMRange VGPRRange;
4608   uint64_t NextFreeVGPR = 0;
4609   uint64_t AccumOffset = 0;
4610   SMRange SGPRRange;
4611   uint64_t NextFreeSGPR = 0;
4612 
4613   // Count the number of user SGPRs implied from the enabled feature bits.
4614   unsigned ImpliedUserSGPRCount = 0;
4615 
4616   // Track if the asm explicitly contains the directive for the user SGPR
4617   // count.
4618   Optional<unsigned> ExplicitUserSGPRCount;
4619   bool ReserveVCC = true;
4620   bool ReserveFlatScr = true;
4621   Optional<bool> EnableWavefrontSize32;
4622 
4623   while (true) {
4624     while (trySkipToken(AsmToken::EndOfStatement));
4625 
4626     StringRef ID;
4627     SMRange IDRange = getTok().getLocRange();
4628     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4629       return true;
4630 
4631     if (ID == ".end_amdhsa_kernel")
4632       break;
4633 
4634     if (Seen.find(ID) != Seen.end())
4635       return TokError(".amdhsa_ directives cannot be repeated");
4636     Seen.insert(ID);
4637 
4638     SMLoc ValStart = getLoc();
4639     int64_t IVal;
4640     if (getParser().parseAbsoluteExpression(IVal))
4641       return true;
4642     SMLoc ValEnd = getLoc();
4643     SMRange ValRange = SMRange(ValStart, ValEnd);
4644 
4645     if (IVal < 0)
4646       return OutOfRangeError(ValRange);
4647 
4648     uint64_t Val = IVal;
4649 
4650 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4651   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4652     return OutOfRangeError(RANGE);                                             \
4653   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4654 
4655     if (ID == ".amdhsa_group_segment_fixed_size") {
4656       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4657         return OutOfRangeError(ValRange);
4658       KD.group_segment_fixed_size = Val;
4659     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4660       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4661         return OutOfRangeError(ValRange);
4662       KD.private_segment_fixed_size = Val;
4663     } else if (ID == ".amdhsa_kernarg_size") {
4664       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4665         return OutOfRangeError(ValRange);
4666       KD.kernarg_size = Val;
4667     } else if (ID == ".amdhsa_user_sgpr_count") {
4668       ExplicitUserSGPRCount = Val;
4669     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4670       if (hasArchitectedFlatScratch())
4671         return Error(IDRange.Start,
4672                      "directive is not supported with architected flat scratch",
4673                      IDRange);
4674       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4675                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4676                        Val, ValRange);
4677       if (Val)
4678         ImpliedUserSGPRCount += 4;
4679     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4680       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4681                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4682                        ValRange);
4683       if (Val)
4684         ImpliedUserSGPRCount += 2;
4685     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4686       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4687                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4688                        ValRange);
4689       if (Val)
4690         ImpliedUserSGPRCount += 2;
4691     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4692       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4693                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4694                        Val, ValRange);
4695       if (Val)
4696         ImpliedUserSGPRCount += 2;
4697     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4698       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4699                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4700                        ValRange);
4701       if (Val)
4702         ImpliedUserSGPRCount += 2;
4703     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4704       if (hasArchitectedFlatScratch())
4705         return Error(IDRange.Start,
4706                      "directive is not supported with architected flat scratch",
4707                      IDRange);
4708       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4709                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4710                        ValRange);
4711       if (Val)
4712         ImpliedUserSGPRCount += 2;
4713     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4714       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4715                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4716                        Val, ValRange);
4717       if (Val)
4718         ImpliedUserSGPRCount += 1;
4719     } else if (ID == ".amdhsa_wavefront_size32") {
4720       if (IVersion.Major < 10)
4721         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4722       EnableWavefrontSize32 = Val;
4723       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4724                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4725                        Val, ValRange);
4726     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4727       if (hasArchitectedFlatScratch())
4728         return Error(IDRange.Start,
4729                      "directive is not supported with architected flat scratch",
4730                      IDRange);
4731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4732                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4733     } else if (ID == ".amdhsa_enable_private_segment") {
4734       if (!hasArchitectedFlatScratch())
4735         return Error(
4736             IDRange.Start,
4737             "directive is not supported without architected flat scratch",
4738             IDRange);
4739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4740                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4741     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4742       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4743                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4744                        ValRange);
4745     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4746       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4747                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4748                        ValRange);
4749     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4750       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4751                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4752                        ValRange);
4753     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4754       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4755                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4756                        ValRange);
4757     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4758       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4759                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4760                        ValRange);
4761     } else if (ID == ".amdhsa_next_free_vgpr") {
4762       VGPRRange = ValRange;
4763       NextFreeVGPR = Val;
4764     } else if (ID == ".amdhsa_next_free_sgpr") {
4765       SGPRRange = ValRange;
4766       NextFreeSGPR = Val;
4767     } else if (ID == ".amdhsa_accum_offset") {
4768       if (!isGFX90A())
4769         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4770       AccumOffset = Val;
4771     } else if (ID == ".amdhsa_reserve_vcc") {
4772       if (!isUInt<1>(Val))
4773         return OutOfRangeError(ValRange);
4774       ReserveVCC = Val;
4775     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4776       if (IVersion.Major < 7)
4777         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4778       if (hasArchitectedFlatScratch())
4779         return Error(IDRange.Start,
4780                      "directive is not supported with architected flat scratch",
4781                      IDRange);
4782       if (!isUInt<1>(Val))
4783         return OutOfRangeError(ValRange);
4784       ReserveFlatScr = Val;
4785     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4786       if (IVersion.Major < 8)
4787         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4788       if (!isUInt<1>(Val))
4789         return OutOfRangeError(ValRange);
4790       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4791         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4792                                  IDRange);
4793     } else if (ID == ".amdhsa_float_round_mode_32") {
4794       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4795                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4796     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4798                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4799     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4800       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4801                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4802     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4804                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4805                        ValRange);
4806     } else if (ID == ".amdhsa_dx10_clamp") {
4807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4808                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4809     } else if (ID == ".amdhsa_ieee_mode") {
4810       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4811                        Val, ValRange);
4812     } else if (ID == ".amdhsa_fp16_overflow") {
4813       if (IVersion.Major < 9)
4814         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4815       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4816                        ValRange);
4817     } else if (ID == ".amdhsa_tg_split") {
4818       if (!isGFX90A())
4819         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4821                        ValRange);
4822     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4823       if (IVersion.Major < 10)
4824         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4825       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4826                        ValRange);
4827     } else if (ID == ".amdhsa_memory_ordered") {
4828       if (IVersion.Major < 10)
4829         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4830       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4831                        ValRange);
4832     } else if (ID == ".amdhsa_forward_progress") {
4833       if (IVersion.Major < 10)
4834         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4835       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4836                        ValRange);
4837     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4838       PARSE_BITS_ENTRY(
4839           KD.compute_pgm_rsrc2,
4840           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4841           ValRange);
4842     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4843       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4844                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4845                        Val, ValRange);
4846     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4847       PARSE_BITS_ENTRY(
4848           KD.compute_pgm_rsrc2,
4849           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4850           ValRange);
4851     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4852       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4853                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4854                        Val, ValRange);
4855     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4856       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4857                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4858                        Val, ValRange);
4859     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4860       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4861                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4862                        Val, ValRange);
4863     } else if (ID == ".amdhsa_exception_int_div_zero") {
4864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4865                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4866                        Val, ValRange);
4867     } else {
4868       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4869     }
4870 
4871 #undef PARSE_BITS_ENTRY
4872   }
4873 
4874   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4875     return TokError(".amdhsa_next_free_vgpr directive is required");
4876 
4877   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4878     return TokError(".amdhsa_next_free_sgpr directive is required");
4879 
4880   unsigned VGPRBlocks;
4881   unsigned SGPRBlocks;
4882   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4883                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4884                          EnableWavefrontSize32, NextFreeVGPR,
4885                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4886                          SGPRBlocks))
4887     return true;
4888 
4889   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4890           VGPRBlocks))
4891     return OutOfRangeError(VGPRRange);
4892   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4893                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4894 
4895   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4896           SGPRBlocks))
4897     return OutOfRangeError(SGPRRange);
4898   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4899                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4900                   SGPRBlocks);
4901 
4902   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4903     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4904                     "enabled user SGPRs");
4905 
4906   unsigned UserSGPRCount =
4907       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4908 
4909   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4910     return TokError("too many user SGPRs enabled");
4911   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4912                   UserSGPRCount);
4913 
4914   if (isGFX90A()) {
4915     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4916       return TokError(".amdhsa_accum_offset directive is required");
4917     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4918       return TokError("accum_offset should be in range [4..256] in "
4919                       "increments of 4");
4920     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4921       return TokError("accum_offset exceeds total VGPR allocation");
4922     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4923                     (AccumOffset / 4 - 1));
4924   }
4925 
4926   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4927       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4928       ReserveFlatScr);
4929   return false;
4930 }
4931 
4932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4933   uint32_t Major;
4934   uint32_t Minor;
4935 
4936   if (ParseDirectiveMajorMinor(Major, Minor))
4937     return true;
4938 
4939   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4940   return false;
4941 }
4942 
4943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4944   uint32_t Major;
4945   uint32_t Minor;
4946   uint32_t Stepping;
4947   StringRef VendorName;
4948   StringRef ArchName;
4949 
4950   // If this directive has no arguments, then use the ISA version for the
4951   // targeted GPU.
4952   if (isToken(AsmToken::EndOfStatement)) {
4953     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4954     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4955                                                         ISA.Stepping,
4956                                                         "AMD", "AMDGPU");
4957     return false;
4958   }
4959 
4960   if (ParseDirectiveMajorMinor(Major, Minor))
4961     return true;
4962 
4963   if (!trySkipToken(AsmToken::Comma))
4964     return TokError("stepping version number required, comma expected");
4965 
4966   if (ParseAsAbsoluteExpression(Stepping))
4967     return TokError("invalid stepping version");
4968 
4969   if (!trySkipToken(AsmToken::Comma))
4970     return TokError("vendor name required, comma expected");
4971 
4972   if (!parseString(VendorName, "invalid vendor name"))
4973     return true;
4974 
4975   if (!trySkipToken(AsmToken::Comma))
4976     return TokError("arch name required, comma expected");
4977 
4978   if (!parseString(ArchName, "invalid arch name"))
4979     return true;
4980 
4981   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4982                                                       VendorName, ArchName);
4983   return false;
4984 }
4985 
4986 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4987                                                amd_kernel_code_t &Header) {
4988   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4989   // assembly for backwards compatibility.
4990   if (ID == "max_scratch_backing_memory_byte_size") {
4991     Parser.eatToEndOfStatement();
4992     return false;
4993   }
4994 
4995   SmallString<40> ErrStr;
4996   raw_svector_ostream Err(ErrStr);
4997   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4998     return TokError(Err.str());
4999   }
5000   Lex();
5001 
5002   if (ID == "enable_wavefront_size32") {
5003     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5004       if (!isGFX10Plus())
5005         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5006       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5007         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5008     } else {
5009       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5010         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5011     }
5012   }
5013 
5014   if (ID == "wavefront_size") {
5015     if (Header.wavefront_size == 5) {
5016       if (!isGFX10Plus())
5017         return TokError("wavefront_size=5 is only allowed on GFX10+");
5018       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5019         return TokError("wavefront_size=5 requires +WavefrontSize32");
5020     } else if (Header.wavefront_size == 6) {
5021       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5022         return TokError("wavefront_size=6 requires +WavefrontSize64");
5023     }
5024   }
5025 
5026   if (ID == "enable_wgp_mode") {
5027     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5028         !isGFX10Plus())
5029       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5030   }
5031 
5032   if (ID == "enable_mem_ordered") {
5033     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5034         !isGFX10Plus())
5035       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5036   }
5037 
5038   if (ID == "enable_fwd_progress") {
5039     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5040         !isGFX10Plus())
5041       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5042   }
5043 
5044   return false;
5045 }
5046 
5047 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5048   amd_kernel_code_t Header;
5049   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5050 
5051   while (true) {
5052     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5053     // will set the current token to EndOfStatement.
5054     while(trySkipToken(AsmToken::EndOfStatement));
5055 
5056     StringRef ID;
5057     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5058       return true;
5059 
5060     if (ID == ".end_amd_kernel_code_t")
5061       break;
5062 
5063     if (ParseAMDKernelCodeTValue(ID, Header))
5064       return true;
5065   }
5066 
5067   getTargetStreamer().EmitAMDKernelCodeT(Header);
5068 
5069   return false;
5070 }
5071 
5072 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5073   StringRef KernelName;
5074   if (!parseId(KernelName, "expected symbol name"))
5075     return true;
5076 
5077   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5078                                            ELF::STT_AMDGPU_HSA_KERNEL);
5079 
5080   KernelScope.initialize(getContext());
5081   return false;
5082 }
5083 
5084 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5085   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5086     return Error(getLoc(),
5087                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5088                  "architectures");
5089   }
5090 
5091   auto TargetIDDirective = getLexer().getTok().getStringContents();
5092   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5093     return Error(getParser().getTok().getLoc(), "target id must match options");
5094 
5095   getTargetStreamer().EmitISAVersion();
5096   Lex();
5097 
5098   return false;
5099 }
5100 
5101 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5102   const char *AssemblerDirectiveBegin;
5103   const char *AssemblerDirectiveEnd;
5104   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5105       isHsaAbiVersion3AndAbove(&getSTI())
5106           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5107                             HSAMD::V3::AssemblerDirectiveEnd)
5108           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5109                             HSAMD::AssemblerDirectiveEnd);
5110 
5111   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5112     return Error(getLoc(),
5113                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5114                  "not available on non-amdhsa OSes")).str());
5115   }
5116 
5117   std::string HSAMetadataString;
5118   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5119                           HSAMetadataString))
5120     return true;
5121 
5122   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5123     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5124       return Error(getLoc(), "invalid HSA metadata");
5125   } else {
5126     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5127       return Error(getLoc(), "invalid HSA metadata");
5128   }
5129 
5130   return false;
5131 }
5132 
5133 /// Common code to parse out a block of text (typically YAML) between start and
5134 /// end directives.
5135 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5136                                           const char *AssemblerDirectiveEnd,
5137                                           std::string &CollectString) {
5138 
5139   raw_string_ostream CollectStream(CollectString);
5140 
5141   getLexer().setSkipSpace(false);
5142 
5143   bool FoundEnd = false;
5144   while (!isToken(AsmToken::Eof)) {
5145     while (isToken(AsmToken::Space)) {
5146       CollectStream << getTokenStr();
5147       Lex();
5148     }
5149 
5150     if (trySkipId(AssemblerDirectiveEnd)) {
5151       FoundEnd = true;
5152       break;
5153     }
5154 
5155     CollectStream << Parser.parseStringToEndOfStatement()
5156                   << getContext().getAsmInfo()->getSeparatorString();
5157 
5158     Parser.eatToEndOfStatement();
5159   }
5160 
5161   getLexer().setSkipSpace(true);
5162 
5163   if (isToken(AsmToken::Eof) && !FoundEnd) {
5164     return TokError(Twine("expected directive ") +
5165                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5166   }
5167 
5168   CollectStream.flush();
5169   return false;
5170 }
5171 
5172 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5173 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5174   std::string String;
5175   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5176                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5177     return true;
5178 
5179   auto PALMetadata = getTargetStreamer().getPALMetadata();
5180   if (!PALMetadata->setFromString(String))
5181     return Error(getLoc(), "invalid PAL metadata");
5182   return false;
5183 }
5184 
5185 /// Parse the assembler directive for old linear-format PAL metadata.
5186 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5187   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5188     return Error(getLoc(),
5189                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5190                  "not available on non-amdpal OSes")).str());
5191   }
5192 
5193   auto PALMetadata = getTargetStreamer().getPALMetadata();
5194   PALMetadata->setLegacy();
5195   for (;;) {
5196     uint32_t Key, Value;
5197     if (ParseAsAbsoluteExpression(Key)) {
5198       return TokError(Twine("invalid value in ") +
5199                       Twine(PALMD::AssemblerDirective));
5200     }
5201     if (!trySkipToken(AsmToken::Comma)) {
5202       return TokError(Twine("expected an even number of values in ") +
5203                       Twine(PALMD::AssemblerDirective));
5204     }
5205     if (ParseAsAbsoluteExpression(Value)) {
5206       return TokError(Twine("invalid value in ") +
5207                       Twine(PALMD::AssemblerDirective));
5208     }
5209     PALMetadata->setRegister(Key, Value);
5210     if (!trySkipToken(AsmToken::Comma))
5211       break;
5212   }
5213   return false;
5214 }
5215 
5216 /// ParseDirectiveAMDGPULDS
5217 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5218 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5219   if (getParser().checkForValidSection())
5220     return true;
5221 
5222   StringRef Name;
5223   SMLoc NameLoc = getLoc();
5224   if (getParser().parseIdentifier(Name))
5225     return TokError("expected identifier in directive");
5226 
5227   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5228   if (parseToken(AsmToken::Comma, "expected ','"))
5229     return true;
5230 
5231   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5232 
5233   int64_t Size;
5234   SMLoc SizeLoc = getLoc();
5235   if (getParser().parseAbsoluteExpression(Size))
5236     return true;
5237   if (Size < 0)
5238     return Error(SizeLoc, "size must be non-negative");
5239   if (Size > LocalMemorySize)
5240     return Error(SizeLoc, "size is too large");
5241 
5242   int64_t Alignment = 4;
5243   if (trySkipToken(AsmToken::Comma)) {
5244     SMLoc AlignLoc = getLoc();
5245     if (getParser().parseAbsoluteExpression(Alignment))
5246       return true;
5247     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5248       return Error(AlignLoc, "alignment must be a power of two");
5249 
5250     // Alignment larger than the size of LDS is possible in theory, as long
5251     // as the linker manages to place to symbol at address 0, but we do want
5252     // to make sure the alignment fits nicely into a 32-bit integer.
5253     if (Alignment >= 1u << 31)
5254       return Error(AlignLoc, "alignment is too large");
5255   }
5256 
5257   if (parseToken(AsmToken::EndOfStatement,
5258                  "unexpected token in '.amdgpu_lds' directive"))
5259     return true;
5260 
5261   Symbol->redefineIfPossible();
5262   if (!Symbol->isUndefined())
5263     return Error(NameLoc, "invalid symbol redefinition");
5264 
5265   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5266   return false;
5267 }
5268 
5269 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5270   StringRef IDVal = DirectiveID.getString();
5271 
5272   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5273     if (IDVal == ".amdhsa_kernel")
5274      return ParseDirectiveAMDHSAKernel();
5275 
5276     // TODO: Restructure/combine with PAL metadata directive.
5277     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5278       return ParseDirectiveHSAMetadata();
5279   } else {
5280     if (IDVal == ".hsa_code_object_version")
5281       return ParseDirectiveHSACodeObjectVersion();
5282 
5283     if (IDVal == ".hsa_code_object_isa")
5284       return ParseDirectiveHSACodeObjectISA();
5285 
5286     if (IDVal == ".amd_kernel_code_t")
5287       return ParseDirectiveAMDKernelCodeT();
5288 
5289     if (IDVal == ".amdgpu_hsa_kernel")
5290       return ParseDirectiveAMDGPUHsaKernel();
5291 
5292     if (IDVal == ".amd_amdgpu_isa")
5293       return ParseDirectiveISAVersion();
5294 
5295     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5296       return ParseDirectiveHSAMetadata();
5297   }
5298 
5299   if (IDVal == ".amdgcn_target")
5300     return ParseDirectiveAMDGCNTarget();
5301 
5302   if (IDVal == ".amdgpu_lds")
5303     return ParseDirectiveAMDGPULDS();
5304 
5305   if (IDVal == PALMD::AssemblerDirectiveBegin)
5306     return ParseDirectivePALMetadataBegin();
5307 
5308   if (IDVal == PALMD::AssemblerDirective)
5309     return ParseDirectivePALMetadata();
5310 
5311   return true;
5312 }
5313 
5314 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5315                                            unsigned RegNo) {
5316 
5317   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5318        R.isValid(); ++R) {
5319     if (*R == RegNo)
5320       return isGFX9Plus();
5321   }
5322 
5323   // GFX10 has 2 more SGPRs 104 and 105.
5324   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5325        R.isValid(); ++R) {
5326     if (*R == RegNo)
5327       return hasSGPR104_SGPR105();
5328   }
5329 
5330   switch (RegNo) {
5331   case AMDGPU::SRC_SHARED_BASE:
5332   case AMDGPU::SRC_SHARED_LIMIT:
5333   case AMDGPU::SRC_PRIVATE_BASE:
5334   case AMDGPU::SRC_PRIVATE_LIMIT:
5335   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5336     return isGFX9Plus();
5337   case AMDGPU::TBA:
5338   case AMDGPU::TBA_LO:
5339   case AMDGPU::TBA_HI:
5340   case AMDGPU::TMA:
5341   case AMDGPU::TMA_LO:
5342   case AMDGPU::TMA_HI:
5343     return !isGFX9Plus();
5344   case AMDGPU::XNACK_MASK:
5345   case AMDGPU::XNACK_MASK_LO:
5346   case AMDGPU::XNACK_MASK_HI:
5347     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5348   case AMDGPU::SGPR_NULL:
5349     return isGFX10Plus();
5350   default:
5351     break;
5352   }
5353 
5354   if (isCI())
5355     return true;
5356 
5357   if (isSI() || isGFX10Plus()) {
5358     // No flat_scr on SI.
5359     // On GFX10 flat scratch is not a valid register operand and can only be
5360     // accessed with s_setreg/s_getreg.
5361     switch (RegNo) {
5362     case AMDGPU::FLAT_SCR:
5363     case AMDGPU::FLAT_SCR_LO:
5364     case AMDGPU::FLAT_SCR_HI:
5365       return false;
5366     default:
5367       return true;
5368     }
5369   }
5370 
5371   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5372   // SI/CI have.
5373   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5374        R.isValid(); ++R) {
5375     if (*R == RegNo)
5376       return hasSGPR102_SGPR103();
5377   }
5378 
5379   return true;
5380 }
5381 
5382 OperandMatchResultTy
5383 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5384                               OperandMode Mode) {
5385   // Try to parse with a custom parser
5386   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5387 
5388   // If we successfully parsed the operand or if there as an error parsing,
5389   // we are done.
5390   //
5391   // If we are parsing after we reach EndOfStatement then this means we
5392   // are appending default values to the Operands list.  This is only done
5393   // by custom parser, so we shouldn't continue on to the generic parsing.
5394   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5395       isToken(AsmToken::EndOfStatement))
5396     return ResTy;
5397 
5398   SMLoc RBraceLoc;
5399   SMLoc LBraceLoc = getLoc();
5400   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5401     unsigned Prefix = Operands.size();
5402 
5403     for (;;) {
5404       auto Loc = getLoc();
5405       ResTy = parseReg(Operands);
5406       if (ResTy == MatchOperand_NoMatch)
5407         Error(Loc, "expected a register");
5408       if (ResTy != MatchOperand_Success)
5409         return MatchOperand_ParseFail;
5410 
5411       RBraceLoc = getLoc();
5412       if (trySkipToken(AsmToken::RBrac))
5413         break;
5414 
5415       if (!skipToken(AsmToken::Comma,
5416                      "expected a comma or a closing square bracket")) {
5417         return MatchOperand_ParseFail;
5418       }
5419     }
5420 
5421     if (Operands.size() - Prefix > 1) {
5422       Operands.insert(Operands.begin() + Prefix,
5423                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5424       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5425     }
5426 
5427     return MatchOperand_Success;
5428   }
5429 
5430   return parseRegOrImm(Operands);
5431 }
5432 
5433 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5434   // Clear any forced encodings from the previous instruction.
5435   setForcedEncodingSize(0);
5436   setForcedDPP(false);
5437   setForcedSDWA(false);
5438 
5439   if (Name.endswith("_e64")) {
5440     setForcedEncodingSize(64);
5441     return Name.substr(0, Name.size() - 4);
5442   } else if (Name.endswith("_e32")) {
5443     setForcedEncodingSize(32);
5444     return Name.substr(0, Name.size() - 4);
5445   } else if (Name.endswith("_dpp")) {
5446     setForcedDPP(true);
5447     return Name.substr(0, Name.size() - 4);
5448   } else if (Name.endswith("_sdwa")) {
5449     setForcedSDWA(true);
5450     return Name.substr(0, Name.size() - 5);
5451   }
5452   return Name;
5453 }
5454 
5455 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5456                                        StringRef Name,
5457                                        SMLoc NameLoc, OperandVector &Operands) {
5458   // Add the instruction mnemonic
5459   Name = parseMnemonicSuffix(Name);
5460   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5461 
5462   bool IsMIMG = Name.startswith("image_");
5463 
5464   while (!trySkipToken(AsmToken::EndOfStatement)) {
5465     OperandMode Mode = OperandMode_Default;
5466     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5467       Mode = OperandMode_NSA;
5468     CPolSeen = 0;
5469     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5470 
5471     if (Res != MatchOperand_Success) {
5472       checkUnsupportedInstruction(Name, NameLoc);
5473       if (!Parser.hasPendingError()) {
5474         // FIXME: use real operand location rather than the current location.
5475         StringRef Msg =
5476           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5477                                             "not a valid operand.";
5478         Error(getLoc(), Msg);
5479       }
5480       while (!trySkipToken(AsmToken::EndOfStatement)) {
5481         lex();
5482       }
5483       return true;
5484     }
5485 
5486     // Eat the comma or space if there is one.
5487     trySkipToken(AsmToken::Comma);
5488   }
5489 
5490   return false;
5491 }
5492 
5493 //===----------------------------------------------------------------------===//
5494 // Utility functions
5495 //===----------------------------------------------------------------------===//
5496 
5497 OperandMatchResultTy
5498 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5499 
5500   if (!trySkipId(Prefix, AsmToken::Colon))
5501     return MatchOperand_NoMatch;
5502 
5503   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5504 }
5505 
5506 OperandMatchResultTy
5507 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5508                                     AMDGPUOperand::ImmTy ImmTy,
5509                                     bool (*ConvertResult)(int64_t&)) {
5510   SMLoc S = getLoc();
5511   int64_t Value = 0;
5512 
5513   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5514   if (Res != MatchOperand_Success)
5515     return Res;
5516 
5517   if (ConvertResult && !ConvertResult(Value)) {
5518     Error(S, "invalid " + StringRef(Prefix) + " value.");
5519   }
5520 
5521   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5522   return MatchOperand_Success;
5523 }
5524 
5525 OperandMatchResultTy
5526 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5527                                              OperandVector &Operands,
5528                                              AMDGPUOperand::ImmTy ImmTy,
5529                                              bool (*ConvertResult)(int64_t&)) {
5530   SMLoc S = getLoc();
5531   if (!trySkipId(Prefix, AsmToken::Colon))
5532     return MatchOperand_NoMatch;
5533 
5534   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5535     return MatchOperand_ParseFail;
5536 
5537   unsigned Val = 0;
5538   const unsigned MaxSize = 4;
5539 
5540   // FIXME: How to verify the number of elements matches the number of src
5541   // operands?
5542   for (int I = 0; ; ++I) {
5543     int64_t Op;
5544     SMLoc Loc = getLoc();
5545     if (!parseExpr(Op))
5546       return MatchOperand_ParseFail;
5547 
5548     if (Op != 0 && Op != 1) {
5549       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5550       return MatchOperand_ParseFail;
5551     }
5552 
5553     Val |= (Op << I);
5554 
5555     if (trySkipToken(AsmToken::RBrac))
5556       break;
5557 
5558     if (I + 1 == MaxSize) {
5559       Error(getLoc(), "expected a closing square bracket");
5560       return MatchOperand_ParseFail;
5561     }
5562 
5563     if (!skipToken(AsmToken::Comma, "expected a comma"))
5564       return MatchOperand_ParseFail;
5565   }
5566 
5567   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5568   return MatchOperand_Success;
5569 }
5570 
5571 OperandMatchResultTy
5572 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5573                                AMDGPUOperand::ImmTy ImmTy) {
5574   int64_t Bit;
5575   SMLoc S = getLoc();
5576 
5577   if (trySkipId(Name)) {
5578     Bit = 1;
5579   } else if (trySkipId("no", Name)) {
5580     Bit = 0;
5581   } else {
5582     return MatchOperand_NoMatch;
5583   }
5584 
5585   if (Name == "r128" && !hasMIMG_R128()) {
5586     Error(S, "r128 modifier is not supported on this GPU");
5587     return MatchOperand_ParseFail;
5588   }
5589   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5590     Error(S, "a16 modifier is not supported on this GPU");
5591     return MatchOperand_ParseFail;
5592   }
5593 
5594   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5595     ImmTy = AMDGPUOperand::ImmTyR128A16;
5596 
5597   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5598   return MatchOperand_Success;
5599 }
5600 
5601 OperandMatchResultTy
5602 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5603   unsigned CPolOn = 0;
5604   unsigned CPolOff = 0;
5605   SMLoc S = getLoc();
5606 
5607   if (trySkipId("glc"))
5608     CPolOn = AMDGPU::CPol::GLC;
5609   else if (trySkipId("noglc"))
5610     CPolOff = AMDGPU::CPol::GLC;
5611   else if (trySkipId("slc"))
5612     CPolOn = AMDGPU::CPol::SLC;
5613   else if (trySkipId("noslc"))
5614     CPolOff = AMDGPU::CPol::SLC;
5615   else if (trySkipId("dlc"))
5616     CPolOn = AMDGPU::CPol::DLC;
5617   else if (trySkipId("nodlc"))
5618     CPolOff = AMDGPU::CPol::DLC;
5619   else if (trySkipId("scc"))
5620     CPolOn = AMDGPU::CPol::SCC;
5621   else if (trySkipId("noscc"))
5622     CPolOff = AMDGPU::CPol::SCC;
5623   else
5624     return MatchOperand_NoMatch;
5625 
5626   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5627     Error(S, "dlc modifier is not supported on this GPU");
5628     return MatchOperand_ParseFail;
5629   }
5630 
5631   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5632     Error(S, "scc modifier is not supported on this GPU");
5633     return MatchOperand_ParseFail;
5634   }
5635 
5636   if (CPolSeen & (CPolOn | CPolOff)) {
5637     Error(S, "duplicate cache policy modifier");
5638     return MatchOperand_ParseFail;
5639   }
5640 
5641   CPolSeen |= (CPolOn | CPolOff);
5642 
5643   for (unsigned I = 1; I != Operands.size(); ++I) {
5644     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5645     if (Op.isCPol()) {
5646       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5647       return MatchOperand_Success;
5648     }
5649   }
5650 
5651   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5652                                               AMDGPUOperand::ImmTyCPol));
5653 
5654   return MatchOperand_Success;
5655 }
5656 
5657 static void addOptionalImmOperand(
5658   MCInst& Inst, const OperandVector& Operands,
5659   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5660   AMDGPUOperand::ImmTy ImmT,
5661   int64_t Default = 0) {
5662   auto i = OptionalIdx.find(ImmT);
5663   if (i != OptionalIdx.end()) {
5664     unsigned Idx = i->second;
5665     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5666   } else {
5667     Inst.addOperand(MCOperand::createImm(Default));
5668   }
5669 }
5670 
5671 OperandMatchResultTy
5672 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5673                                        StringRef &Value,
5674                                        SMLoc &StringLoc) {
5675   if (!trySkipId(Prefix, AsmToken::Colon))
5676     return MatchOperand_NoMatch;
5677 
5678   StringLoc = getLoc();
5679   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5680                                                   : MatchOperand_ParseFail;
5681 }
5682 
5683 //===----------------------------------------------------------------------===//
5684 // MTBUF format
5685 //===----------------------------------------------------------------------===//
5686 
5687 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5688                                   int64_t MaxVal,
5689                                   int64_t &Fmt) {
5690   int64_t Val;
5691   SMLoc Loc = getLoc();
5692 
5693   auto Res = parseIntWithPrefix(Pref, Val);
5694   if (Res == MatchOperand_ParseFail)
5695     return false;
5696   if (Res == MatchOperand_NoMatch)
5697     return true;
5698 
5699   if (Val < 0 || Val > MaxVal) {
5700     Error(Loc, Twine("out of range ", StringRef(Pref)));
5701     return false;
5702   }
5703 
5704   Fmt = Val;
5705   return true;
5706 }
5707 
5708 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5709 // values to live in a joint format operand in the MCInst encoding.
5710 OperandMatchResultTy
5711 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5712   using namespace llvm::AMDGPU::MTBUFFormat;
5713 
5714   int64_t Dfmt = DFMT_UNDEF;
5715   int64_t Nfmt = NFMT_UNDEF;
5716 
5717   // dfmt and nfmt can appear in either order, and each is optional.
5718   for (int I = 0; I < 2; ++I) {
5719     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5720       return MatchOperand_ParseFail;
5721 
5722     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5723       return MatchOperand_ParseFail;
5724     }
5725     // Skip optional comma between dfmt/nfmt
5726     // but guard against 2 commas following each other.
5727     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5728         !peekToken().is(AsmToken::Comma)) {
5729       trySkipToken(AsmToken::Comma);
5730     }
5731   }
5732 
5733   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5734     return MatchOperand_NoMatch;
5735 
5736   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5737   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5738 
5739   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5740   return MatchOperand_Success;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5745   using namespace llvm::AMDGPU::MTBUFFormat;
5746 
5747   int64_t Fmt = UFMT_UNDEF;
5748 
5749   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5750     return MatchOperand_ParseFail;
5751 
5752   if (Fmt == UFMT_UNDEF)
5753     return MatchOperand_NoMatch;
5754 
5755   Format = Fmt;
5756   return MatchOperand_Success;
5757 }
5758 
5759 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5760                                     int64_t &Nfmt,
5761                                     StringRef FormatStr,
5762                                     SMLoc Loc) {
5763   using namespace llvm::AMDGPU::MTBUFFormat;
5764   int64_t Format;
5765 
5766   Format = getDfmt(FormatStr);
5767   if (Format != DFMT_UNDEF) {
5768     Dfmt = Format;
5769     return true;
5770   }
5771 
5772   Format = getNfmt(FormatStr, getSTI());
5773   if (Format != NFMT_UNDEF) {
5774     Nfmt = Format;
5775     return true;
5776   }
5777 
5778   Error(Loc, "unsupported format");
5779   return false;
5780 }
5781 
5782 OperandMatchResultTy
5783 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5784                                           SMLoc FormatLoc,
5785                                           int64_t &Format) {
5786   using namespace llvm::AMDGPU::MTBUFFormat;
5787 
5788   int64_t Dfmt = DFMT_UNDEF;
5789   int64_t Nfmt = NFMT_UNDEF;
5790   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5791     return MatchOperand_ParseFail;
5792 
5793   if (trySkipToken(AsmToken::Comma)) {
5794     StringRef Str;
5795     SMLoc Loc = getLoc();
5796     if (!parseId(Str, "expected a format string") ||
5797         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5798       return MatchOperand_ParseFail;
5799     }
5800     if (Dfmt == DFMT_UNDEF) {
5801       Error(Loc, "duplicate numeric format");
5802       return MatchOperand_ParseFail;
5803     } else if (Nfmt == NFMT_UNDEF) {
5804       Error(Loc, "duplicate data format");
5805       return MatchOperand_ParseFail;
5806     }
5807   }
5808 
5809   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5810   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5811 
5812   if (isGFX10Plus()) {
5813     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5814     if (Ufmt == UFMT_UNDEF) {
5815       Error(FormatLoc, "unsupported format");
5816       return MatchOperand_ParseFail;
5817     }
5818     Format = Ufmt;
5819   } else {
5820     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5821   }
5822 
5823   return MatchOperand_Success;
5824 }
5825 
5826 OperandMatchResultTy
5827 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5828                                             SMLoc Loc,
5829                                             int64_t &Format) {
5830   using namespace llvm::AMDGPU::MTBUFFormat;
5831 
5832   auto Id = getUnifiedFormat(FormatStr);
5833   if (Id == UFMT_UNDEF)
5834     return MatchOperand_NoMatch;
5835 
5836   if (!isGFX10Plus()) {
5837     Error(Loc, "unified format is not supported on this GPU");
5838     return MatchOperand_ParseFail;
5839   }
5840 
5841   Format = Id;
5842   return MatchOperand_Success;
5843 }
5844 
5845 OperandMatchResultTy
5846 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5847   using namespace llvm::AMDGPU::MTBUFFormat;
5848   SMLoc Loc = getLoc();
5849 
5850   if (!parseExpr(Format))
5851     return MatchOperand_ParseFail;
5852   if (!isValidFormatEncoding(Format, getSTI())) {
5853     Error(Loc, "out of range format");
5854     return MatchOperand_ParseFail;
5855   }
5856 
5857   return MatchOperand_Success;
5858 }
5859 
5860 OperandMatchResultTy
5861 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5862   using namespace llvm::AMDGPU::MTBUFFormat;
5863 
5864   if (!trySkipId("format", AsmToken::Colon))
5865     return MatchOperand_NoMatch;
5866 
5867   if (trySkipToken(AsmToken::LBrac)) {
5868     StringRef FormatStr;
5869     SMLoc Loc = getLoc();
5870     if (!parseId(FormatStr, "expected a format string"))
5871       return MatchOperand_ParseFail;
5872 
5873     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5874     if (Res == MatchOperand_NoMatch)
5875       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5876     if (Res != MatchOperand_Success)
5877       return Res;
5878 
5879     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5880       return MatchOperand_ParseFail;
5881 
5882     return MatchOperand_Success;
5883   }
5884 
5885   return parseNumericFormat(Format);
5886 }
5887 
5888 OperandMatchResultTy
5889 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5890   using namespace llvm::AMDGPU::MTBUFFormat;
5891 
5892   int64_t Format = getDefaultFormatEncoding(getSTI());
5893   OperandMatchResultTy Res;
5894   SMLoc Loc = getLoc();
5895 
5896   // Parse legacy format syntax.
5897   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5898   if (Res == MatchOperand_ParseFail)
5899     return Res;
5900 
5901   bool FormatFound = (Res == MatchOperand_Success);
5902 
5903   Operands.push_back(
5904     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5905 
5906   if (FormatFound)
5907     trySkipToken(AsmToken::Comma);
5908 
5909   if (isToken(AsmToken::EndOfStatement)) {
5910     // We are expecting an soffset operand,
5911     // but let matcher handle the error.
5912     return MatchOperand_Success;
5913   }
5914 
5915   // Parse soffset.
5916   Res = parseRegOrImm(Operands);
5917   if (Res != MatchOperand_Success)
5918     return Res;
5919 
5920   trySkipToken(AsmToken::Comma);
5921 
5922   if (!FormatFound) {
5923     Res = parseSymbolicOrNumericFormat(Format);
5924     if (Res == MatchOperand_ParseFail)
5925       return Res;
5926     if (Res == MatchOperand_Success) {
5927       auto Size = Operands.size();
5928       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5929       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5930       Op.setImm(Format);
5931     }
5932     return MatchOperand_Success;
5933   }
5934 
5935   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5936     Error(getLoc(), "duplicate format");
5937     return MatchOperand_ParseFail;
5938   }
5939   return MatchOperand_Success;
5940 }
5941 
5942 //===----------------------------------------------------------------------===//
5943 // ds
5944 //===----------------------------------------------------------------------===//
5945 
5946 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5947                                     const OperandVector &Operands) {
5948   OptionalImmIndexMap OptionalIdx;
5949 
5950   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5951     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5952 
5953     // Add the register arguments
5954     if (Op.isReg()) {
5955       Op.addRegOperands(Inst, 1);
5956       continue;
5957     }
5958 
5959     // Handle optional arguments
5960     OptionalIdx[Op.getImmTy()] = i;
5961   }
5962 
5963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5965   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5966 
5967   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5968 }
5969 
5970 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5971                                 bool IsGdsHardcoded) {
5972   OptionalImmIndexMap OptionalIdx;
5973 
5974   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5975     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5976 
5977     // Add the register arguments
5978     if (Op.isReg()) {
5979       Op.addRegOperands(Inst, 1);
5980       continue;
5981     }
5982 
5983     if (Op.isToken() && Op.getToken() == "gds") {
5984       IsGdsHardcoded = true;
5985       continue;
5986     }
5987 
5988     // Handle optional arguments
5989     OptionalIdx[Op.getImmTy()] = i;
5990   }
5991 
5992   AMDGPUOperand::ImmTy OffsetType =
5993     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5994      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5995      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5996                                                       AMDGPUOperand::ImmTyOffset;
5997 
5998   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5999 
6000   if (!IsGdsHardcoded) {
6001     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6002   }
6003   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6004 }
6005 
6006 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6007   OptionalImmIndexMap OptionalIdx;
6008 
6009   unsigned OperandIdx[4];
6010   unsigned EnMask = 0;
6011   int SrcIdx = 0;
6012 
6013   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6014     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6015 
6016     // Add the register arguments
6017     if (Op.isReg()) {
6018       assert(SrcIdx < 4);
6019       OperandIdx[SrcIdx] = Inst.size();
6020       Op.addRegOperands(Inst, 1);
6021       ++SrcIdx;
6022       continue;
6023     }
6024 
6025     if (Op.isOff()) {
6026       assert(SrcIdx < 4);
6027       OperandIdx[SrcIdx] = Inst.size();
6028       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6029       ++SrcIdx;
6030       continue;
6031     }
6032 
6033     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6034       Op.addImmOperands(Inst, 1);
6035       continue;
6036     }
6037 
6038     if (Op.isToken() && Op.getToken() == "done")
6039       continue;
6040 
6041     // Handle optional arguments
6042     OptionalIdx[Op.getImmTy()] = i;
6043   }
6044 
6045   assert(SrcIdx == 4);
6046 
6047   bool Compr = false;
6048   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6049     Compr = true;
6050     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6051     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6052     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6053   }
6054 
6055   for (auto i = 0; i < SrcIdx; ++i) {
6056     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6057       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6058     }
6059   }
6060 
6061   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6063 
6064   Inst.addOperand(MCOperand::createImm(EnMask));
6065 }
6066 
6067 //===----------------------------------------------------------------------===//
6068 // s_waitcnt
6069 //===----------------------------------------------------------------------===//
6070 
6071 static bool
6072 encodeCnt(
6073   const AMDGPU::IsaVersion ISA,
6074   int64_t &IntVal,
6075   int64_t CntVal,
6076   bool Saturate,
6077   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6078   unsigned (*decode)(const IsaVersion &Version, unsigned))
6079 {
6080   bool Failed = false;
6081 
6082   IntVal = encode(ISA, IntVal, CntVal);
6083   if (CntVal != decode(ISA, IntVal)) {
6084     if (Saturate) {
6085       IntVal = encode(ISA, IntVal, -1);
6086     } else {
6087       Failed = true;
6088     }
6089   }
6090   return Failed;
6091 }
6092 
6093 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6094 
6095   SMLoc CntLoc = getLoc();
6096   StringRef CntName = getTokenStr();
6097 
6098   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6099       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6100     return false;
6101 
6102   int64_t CntVal;
6103   SMLoc ValLoc = getLoc();
6104   if (!parseExpr(CntVal))
6105     return false;
6106 
6107   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6108 
6109   bool Failed = true;
6110   bool Sat = CntName.endswith("_sat");
6111 
6112   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6113     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6114   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6115     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6116   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6117     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6118   } else {
6119     Error(CntLoc, "invalid counter name " + CntName);
6120     return false;
6121   }
6122 
6123   if (Failed) {
6124     Error(ValLoc, "too large value for " + CntName);
6125     return false;
6126   }
6127 
6128   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6129     return false;
6130 
6131   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6132     if (isToken(AsmToken::EndOfStatement)) {
6133       Error(getLoc(), "expected a counter name");
6134       return false;
6135     }
6136   }
6137 
6138   return true;
6139 }
6140 
6141 OperandMatchResultTy
6142 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6143   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6144   int64_t Waitcnt = getWaitcntBitMask(ISA);
6145   SMLoc S = getLoc();
6146 
6147   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6148     while (!isToken(AsmToken::EndOfStatement)) {
6149       if (!parseCnt(Waitcnt))
6150         return MatchOperand_ParseFail;
6151     }
6152   } else {
6153     if (!parseExpr(Waitcnt))
6154       return MatchOperand_ParseFail;
6155   }
6156 
6157   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6158   return MatchOperand_Success;
6159 }
6160 
6161 bool
6162 AMDGPUOperand::isSWaitCnt() const {
6163   return isImm();
6164 }
6165 
6166 //===----------------------------------------------------------------------===//
6167 // hwreg
6168 //===----------------------------------------------------------------------===//
6169 
6170 bool
6171 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6172                                 OperandInfoTy &Offset,
6173                                 OperandInfoTy &Width) {
6174   using namespace llvm::AMDGPU::Hwreg;
6175 
6176   // The register may be specified by name or using a numeric code
6177   HwReg.Loc = getLoc();
6178   if (isToken(AsmToken::Identifier) &&
6179       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6180     HwReg.IsSymbolic = true;
6181     lex(); // skip register name
6182   } else if (!parseExpr(HwReg.Id, "a register name")) {
6183     return false;
6184   }
6185 
6186   if (trySkipToken(AsmToken::RParen))
6187     return true;
6188 
6189   // parse optional params
6190   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6191     return false;
6192 
6193   Offset.Loc = getLoc();
6194   if (!parseExpr(Offset.Id))
6195     return false;
6196 
6197   if (!skipToken(AsmToken::Comma, "expected a comma"))
6198     return false;
6199 
6200   Width.Loc = getLoc();
6201   return parseExpr(Width.Id) &&
6202          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6203 }
6204 
6205 bool
6206 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6207                                const OperandInfoTy &Offset,
6208                                const OperandInfoTy &Width) {
6209 
6210   using namespace llvm::AMDGPU::Hwreg;
6211 
6212   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6213     Error(HwReg.Loc,
6214           "specified hardware register is not supported on this GPU");
6215     return false;
6216   }
6217   if (!isValidHwreg(HwReg.Id)) {
6218     Error(HwReg.Loc,
6219           "invalid code of hardware register: only 6-bit values are legal");
6220     return false;
6221   }
6222   if (!isValidHwregOffset(Offset.Id)) {
6223     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6224     return false;
6225   }
6226   if (!isValidHwregWidth(Width.Id)) {
6227     Error(Width.Loc,
6228           "invalid bitfield width: only values from 1 to 32 are legal");
6229     return false;
6230   }
6231   return true;
6232 }
6233 
6234 OperandMatchResultTy
6235 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6236   using namespace llvm::AMDGPU::Hwreg;
6237 
6238   int64_t ImmVal = 0;
6239   SMLoc Loc = getLoc();
6240 
6241   if (trySkipId("hwreg", AsmToken::LParen)) {
6242     OperandInfoTy HwReg(ID_UNKNOWN_);
6243     OperandInfoTy Offset(OFFSET_DEFAULT_);
6244     OperandInfoTy Width(WIDTH_DEFAULT_);
6245     if (parseHwregBody(HwReg, Offset, Width) &&
6246         validateHwreg(HwReg, Offset, Width)) {
6247       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6248     } else {
6249       return MatchOperand_ParseFail;
6250     }
6251   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6252     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6253       Error(Loc, "invalid immediate: only 16-bit values are legal");
6254       return MatchOperand_ParseFail;
6255     }
6256   } else {
6257     return MatchOperand_ParseFail;
6258   }
6259 
6260   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6261   return MatchOperand_Success;
6262 }
6263 
6264 bool AMDGPUOperand::isHwreg() const {
6265   return isImmTy(ImmTyHwreg);
6266 }
6267 
6268 //===----------------------------------------------------------------------===//
6269 // sendmsg
6270 //===----------------------------------------------------------------------===//
6271 
6272 bool
6273 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6274                                   OperandInfoTy &Op,
6275                                   OperandInfoTy &Stream) {
6276   using namespace llvm::AMDGPU::SendMsg;
6277 
6278   Msg.Loc = getLoc();
6279   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6280     Msg.IsSymbolic = true;
6281     lex(); // skip message name
6282   } else if (!parseExpr(Msg.Id, "a message name")) {
6283     return false;
6284   }
6285 
6286   if (trySkipToken(AsmToken::Comma)) {
6287     Op.IsDefined = true;
6288     Op.Loc = getLoc();
6289     if (isToken(AsmToken::Identifier) &&
6290         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6291       lex(); // skip operation name
6292     } else if (!parseExpr(Op.Id, "an operation name")) {
6293       return false;
6294     }
6295 
6296     if (trySkipToken(AsmToken::Comma)) {
6297       Stream.IsDefined = true;
6298       Stream.Loc = getLoc();
6299       if (!parseExpr(Stream.Id))
6300         return false;
6301     }
6302   }
6303 
6304   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6305 }
6306 
6307 bool
6308 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6309                                  const OperandInfoTy &Op,
6310                                  const OperandInfoTy &Stream) {
6311   using namespace llvm::AMDGPU::SendMsg;
6312 
6313   // Validation strictness depends on whether message is specified
6314   // in a symbolc or in a numeric form. In the latter case
6315   // only encoding possibility is checked.
6316   bool Strict = Msg.IsSymbolic;
6317 
6318   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6319     Error(Msg.Loc, "invalid message id");
6320     return false;
6321   }
6322   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6323     if (Op.IsDefined) {
6324       Error(Op.Loc, "message does not support operations");
6325     } else {
6326       Error(Msg.Loc, "missing message operation");
6327     }
6328     return false;
6329   }
6330   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6331     Error(Op.Loc, "invalid operation id");
6332     return false;
6333   }
6334   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6335     Error(Stream.Loc, "message operation does not support streams");
6336     return false;
6337   }
6338   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6339     Error(Stream.Loc, "invalid message stream id");
6340     return false;
6341   }
6342   return true;
6343 }
6344 
6345 OperandMatchResultTy
6346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6347   using namespace llvm::AMDGPU::SendMsg;
6348 
6349   int64_t ImmVal = 0;
6350   SMLoc Loc = getLoc();
6351 
6352   if (trySkipId("sendmsg", AsmToken::LParen)) {
6353     OperandInfoTy Msg(ID_UNKNOWN_);
6354     OperandInfoTy Op(OP_NONE_);
6355     OperandInfoTy Stream(STREAM_ID_NONE_);
6356     if (parseSendMsgBody(Msg, Op, Stream) &&
6357         validateSendMsg(Msg, Op, Stream)) {
6358       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6359     } else {
6360       return MatchOperand_ParseFail;
6361     }
6362   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6363     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6364       Error(Loc, "invalid immediate: only 16-bit values are legal");
6365       return MatchOperand_ParseFail;
6366     }
6367   } else {
6368     return MatchOperand_ParseFail;
6369   }
6370 
6371   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6372   return MatchOperand_Success;
6373 }
6374 
6375 bool AMDGPUOperand::isSendMsg() const {
6376   return isImmTy(ImmTySendMsg);
6377 }
6378 
6379 //===----------------------------------------------------------------------===//
6380 // v_interp
6381 //===----------------------------------------------------------------------===//
6382 
6383 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6384   StringRef Str;
6385   SMLoc S = getLoc();
6386 
6387   if (!parseId(Str))
6388     return MatchOperand_NoMatch;
6389 
6390   int Slot = StringSwitch<int>(Str)
6391     .Case("p10", 0)
6392     .Case("p20", 1)
6393     .Case("p0", 2)
6394     .Default(-1);
6395 
6396   if (Slot == -1) {
6397     Error(S, "invalid interpolation slot");
6398     return MatchOperand_ParseFail;
6399   }
6400 
6401   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6402                                               AMDGPUOperand::ImmTyInterpSlot));
6403   return MatchOperand_Success;
6404 }
6405 
6406 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6407   StringRef Str;
6408   SMLoc S = getLoc();
6409 
6410   if (!parseId(Str))
6411     return MatchOperand_NoMatch;
6412 
6413   if (!Str.startswith("attr")) {
6414     Error(S, "invalid interpolation attribute");
6415     return MatchOperand_ParseFail;
6416   }
6417 
6418   StringRef Chan = Str.take_back(2);
6419   int AttrChan = StringSwitch<int>(Chan)
6420     .Case(".x", 0)
6421     .Case(".y", 1)
6422     .Case(".z", 2)
6423     .Case(".w", 3)
6424     .Default(-1);
6425   if (AttrChan == -1) {
6426     Error(S, "invalid or missing interpolation attribute channel");
6427     return MatchOperand_ParseFail;
6428   }
6429 
6430   Str = Str.drop_back(2).drop_front(4);
6431 
6432   uint8_t Attr;
6433   if (Str.getAsInteger(10, Attr)) {
6434     Error(S, "invalid or missing interpolation attribute number");
6435     return MatchOperand_ParseFail;
6436   }
6437 
6438   if (Attr > 63) {
6439     Error(S, "out of bounds interpolation attribute number");
6440     return MatchOperand_ParseFail;
6441   }
6442 
6443   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6444 
6445   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6446                                               AMDGPUOperand::ImmTyInterpAttr));
6447   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6448                                               AMDGPUOperand::ImmTyAttrChan));
6449   return MatchOperand_Success;
6450 }
6451 
6452 //===----------------------------------------------------------------------===//
6453 // exp
6454 //===----------------------------------------------------------------------===//
6455 
6456 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6457   using namespace llvm::AMDGPU::Exp;
6458 
6459   StringRef Str;
6460   SMLoc S = getLoc();
6461 
6462   if (!parseId(Str))
6463     return MatchOperand_NoMatch;
6464 
6465   unsigned Id = getTgtId(Str);
6466   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6467     Error(S, (Id == ET_INVALID) ?
6468                 "invalid exp target" :
6469                 "exp target is not supported on this GPU");
6470     return MatchOperand_ParseFail;
6471   }
6472 
6473   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6474                                               AMDGPUOperand::ImmTyExpTgt));
6475   return MatchOperand_Success;
6476 }
6477 
6478 //===----------------------------------------------------------------------===//
6479 // parser helpers
6480 //===----------------------------------------------------------------------===//
6481 
6482 bool
6483 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6484   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6485 }
6486 
6487 bool
6488 AMDGPUAsmParser::isId(const StringRef Id) const {
6489   return isId(getToken(), Id);
6490 }
6491 
6492 bool
6493 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6494   return getTokenKind() == Kind;
6495 }
6496 
6497 bool
6498 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6499   if (isId(Id)) {
6500     lex();
6501     return true;
6502   }
6503   return false;
6504 }
6505 
6506 bool
6507 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6508   if (isToken(AsmToken::Identifier)) {
6509     StringRef Tok = getTokenStr();
6510     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6511       lex();
6512       return true;
6513     }
6514   }
6515   return false;
6516 }
6517 
6518 bool
6519 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6520   if (isId(Id) && peekToken().is(Kind)) {
6521     lex();
6522     lex();
6523     return true;
6524   }
6525   return false;
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6530   if (isToken(Kind)) {
6531     lex();
6532     return true;
6533   }
6534   return false;
6535 }
6536 
6537 bool
6538 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6539                            const StringRef ErrMsg) {
6540   if (!trySkipToken(Kind)) {
6541     Error(getLoc(), ErrMsg);
6542     return false;
6543   }
6544   return true;
6545 }
6546 
6547 bool
6548 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6549   SMLoc S = getLoc();
6550 
6551   const MCExpr *Expr;
6552   if (Parser.parseExpression(Expr))
6553     return false;
6554 
6555   if (Expr->evaluateAsAbsolute(Imm))
6556     return true;
6557 
6558   if (Expected.empty()) {
6559     Error(S, "expected absolute expression");
6560   } else {
6561     Error(S, Twine("expected ", Expected) +
6562              Twine(" or an absolute expression"));
6563   }
6564   return false;
6565 }
6566 
6567 bool
6568 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6569   SMLoc S = getLoc();
6570 
6571   const MCExpr *Expr;
6572   if (Parser.parseExpression(Expr))
6573     return false;
6574 
6575   int64_t IntVal;
6576   if (Expr->evaluateAsAbsolute(IntVal)) {
6577     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6578   } else {
6579     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6580   }
6581   return true;
6582 }
6583 
6584 bool
6585 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6586   if (isToken(AsmToken::String)) {
6587     Val = getToken().getStringContents();
6588     lex();
6589     return true;
6590   } else {
6591     Error(getLoc(), ErrMsg);
6592     return false;
6593   }
6594 }
6595 
6596 bool
6597 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6598   if (isToken(AsmToken::Identifier)) {
6599     Val = getTokenStr();
6600     lex();
6601     return true;
6602   } else {
6603     if (!ErrMsg.empty())
6604       Error(getLoc(), ErrMsg);
6605     return false;
6606   }
6607 }
6608 
6609 AsmToken
6610 AMDGPUAsmParser::getToken() const {
6611   return Parser.getTok();
6612 }
6613 
6614 AsmToken
6615 AMDGPUAsmParser::peekToken() {
6616   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6617 }
6618 
6619 void
6620 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6621   auto TokCount = getLexer().peekTokens(Tokens);
6622 
6623   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6624     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6625 }
6626 
6627 AsmToken::TokenKind
6628 AMDGPUAsmParser::getTokenKind() const {
6629   return getLexer().getKind();
6630 }
6631 
6632 SMLoc
6633 AMDGPUAsmParser::getLoc() const {
6634   return getToken().getLoc();
6635 }
6636 
6637 StringRef
6638 AMDGPUAsmParser::getTokenStr() const {
6639   return getToken().getString();
6640 }
6641 
6642 void
6643 AMDGPUAsmParser::lex() {
6644   Parser.Lex();
6645 }
6646 
6647 SMLoc
6648 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6649                                const OperandVector &Operands) const {
6650   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6651     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6652     if (Test(Op))
6653       return Op.getStartLoc();
6654   }
6655   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6656 }
6657 
6658 SMLoc
6659 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6660                            const OperandVector &Operands) const {
6661   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6662   return getOperandLoc(Test, Operands);
6663 }
6664 
6665 SMLoc
6666 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6667                            const OperandVector &Operands) const {
6668   auto Test = [=](const AMDGPUOperand& Op) {
6669     return Op.isRegKind() && Op.getReg() == Reg;
6670   };
6671   return getOperandLoc(Test, Operands);
6672 }
6673 
6674 SMLoc
6675 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6676   auto Test = [](const AMDGPUOperand& Op) {
6677     return Op.IsImmKindLiteral() || Op.isExpr();
6678   };
6679   return getOperandLoc(Test, Operands);
6680 }
6681 
6682 SMLoc
6683 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6684   auto Test = [](const AMDGPUOperand& Op) {
6685     return Op.isImmKindConst();
6686   };
6687   return getOperandLoc(Test, Operands);
6688 }
6689 
6690 //===----------------------------------------------------------------------===//
6691 // swizzle
6692 //===----------------------------------------------------------------------===//
6693 
6694 LLVM_READNONE
6695 static unsigned
6696 encodeBitmaskPerm(const unsigned AndMask,
6697                   const unsigned OrMask,
6698                   const unsigned XorMask) {
6699   using namespace llvm::AMDGPU::Swizzle;
6700 
6701   return BITMASK_PERM_ENC |
6702          (AndMask << BITMASK_AND_SHIFT) |
6703          (OrMask  << BITMASK_OR_SHIFT)  |
6704          (XorMask << BITMASK_XOR_SHIFT);
6705 }
6706 
6707 bool
6708 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6709                                      const unsigned MinVal,
6710                                      const unsigned MaxVal,
6711                                      const StringRef ErrMsg,
6712                                      SMLoc &Loc) {
6713   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6714     return false;
6715   }
6716   Loc = getLoc();
6717   if (!parseExpr(Op)) {
6718     return false;
6719   }
6720   if (Op < MinVal || Op > MaxVal) {
6721     Error(Loc, ErrMsg);
6722     return false;
6723   }
6724 
6725   return true;
6726 }
6727 
6728 bool
6729 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6730                                       const unsigned MinVal,
6731                                       const unsigned MaxVal,
6732                                       const StringRef ErrMsg) {
6733   SMLoc Loc;
6734   for (unsigned i = 0; i < OpNum; ++i) {
6735     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6736       return false;
6737   }
6738 
6739   return true;
6740 }
6741 
6742 bool
6743 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6744   using namespace llvm::AMDGPU::Swizzle;
6745 
6746   int64_t Lane[LANE_NUM];
6747   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6748                            "expected a 2-bit lane id")) {
6749     Imm = QUAD_PERM_ENC;
6750     for (unsigned I = 0; I < LANE_NUM; ++I) {
6751       Imm |= Lane[I] << (LANE_SHIFT * I);
6752     }
6753     return true;
6754   }
6755   return false;
6756 }
6757 
6758 bool
6759 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6760   using namespace llvm::AMDGPU::Swizzle;
6761 
6762   SMLoc Loc;
6763   int64_t GroupSize;
6764   int64_t LaneIdx;
6765 
6766   if (!parseSwizzleOperand(GroupSize,
6767                            2, 32,
6768                            "group size must be in the interval [2,32]",
6769                            Loc)) {
6770     return false;
6771   }
6772   if (!isPowerOf2_64(GroupSize)) {
6773     Error(Loc, "group size must be a power of two");
6774     return false;
6775   }
6776   if (parseSwizzleOperand(LaneIdx,
6777                           0, GroupSize - 1,
6778                           "lane id must be in the interval [0,group size - 1]",
6779                           Loc)) {
6780     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6781     return true;
6782   }
6783   return false;
6784 }
6785 
6786 bool
6787 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6788   using namespace llvm::AMDGPU::Swizzle;
6789 
6790   SMLoc Loc;
6791   int64_t GroupSize;
6792 
6793   if (!parseSwizzleOperand(GroupSize,
6794                            2, 32,
6795                            "group size must be in the interval [2,32]",
6796                            Loc)) {
6797     return false;
6798   }
6799   if (!isPowerOf2_64(GroupSize)) {
6800     Error(Loc, "group size must be a power of two");
6801     return false;
6802   }
6803 
6804   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6805   return true;
6806 }
6807 
6808 bool
6809 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6810   using namespace llvm::AMDGPU::Swizzle;
6811 
6812   SMLoc Loc;
6813   int64_t GroupSize;
6814 
6815   if (!parseSwizzleOperand(GroupSize,
6816                            1, 16,
6817                            "group size must be in the interval [1,16]",
6818                            Loc)) {
6819     return false;
6820   }
6821   if (!isPowerOf2_64(GroupSize)) {
6822     Error(Loc, "group size must be a power of two");
6823     return false;
6824   }
6825 
6826   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6827   return true;
6828 }
6829 
6830 bool
6831 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6832   using namespace llvm::AMDGPU::Swizzle;
6833 
6834   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6835     return false;
6836   }
6837 
6838   StringRef Ctl;
6839   SMLoc StrLoc = getLoc();
6840   if (!parseString(Ctl)) {
6841     return false;
6842   }
6843   if (Ctl.size() != BITMASK_WIDTH) {
6844     Error(StrLoc, "expected a 5-character mask");
6845     return false;
6846   }
6847 
6848   unsigned AndMask = 0;
6849   unsigned OrMask = 0;
6850   unsigned XorMask = 0;
6851 
6852   for (size_t i = 0; i < Ctl.size(); ++i) {
6853     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6854     switch(Ctl[i]) {
6855     default:
6856       Error(StrLoc, "invalid mask");
6857       return false;
6858     case '0':
6859       break;
6860     case '1':
6861       OrMask |= Mask;
6862       break;
6863     case 'p':
6864       AndMask |= Mask;
6865       break;
6866     case 'i':
6867       AndMask |= Mask;
6868       XorMask |= Mask;
6869       break;
6870     }
6871   }
6872 
6873   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6874   return true;
6875 }
6876 
6877 bool
6878 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6879 
6880   SMLoc OffsetLoc = getLoc();
6881 
6882   if (!parseExpr(Imm, "a swizzle macro")) {
6883     return false;
6884   }
6885   if (!isUInt<16>(Imm)) {
6886     Error(OffsetLoc, "expected a 16-bit offset");
6887     return false;
6888   }
6889   return true;
6890 }
6891 
6892 bool
6893 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6894   using namespace llvm::AMDGPU::Swizzle;
6895 
6896   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6897 
6898     SMLoc ModeLoc = getLoc();
6899     bool Ok = false;
6900 
6901     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6902       Ok = parseSwizzleQuadPerm(Imm);
6903     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6904       Ok = parseSwizzleBitmaskPerm(Imm);
6905     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6906       Ok = parseSwizzleBroadcast(Imm);
6907     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6908       Ok = parseSwizzleSwap(Imm);
6909     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6910       Ok = parseSwizzleReverse(Imm);
6911     } else {
6912       Error(ModeLoc, "expected a swizzle mode");
6913     }
6914 
6915     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6916   }
6917 
6918   return false;
6919 }
6920 
6921 OperandMatchResultTy
6922 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6923   SMLoc S = getLoc();
6924   int64_t Imm = 0;
6925 
6926   if (trySkipId("offset")) {
6927 
6928     bool Ok = false;
6929     if (skipToken(AsmToken::Colon, "expected a colon")) {
6930       if (trySkipId("swizzle")) {
6931         Ok = parseSwizzleMacro(Imm);
6932       } else {
6933         Ok = parseSwizzleOffset(Imm);
6934       }
6935     }
6936 
6937     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6938 
6939     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6940   } else {
6941     // Swizzle "offset" operand is optional.
6942     // If it is omitted, try parsing other optional operands.
6943     return parseOptionalOpr(Operands);
6944   }
6945 }
6946 
6947 bool
6948 AMDGPUOperand::isSwizzle() const {
6949   return isImmTy(ImmTySwizzle);
6950 }
6951 
6952 //===----------------------------------------------------------------------===//
6953 // VGPR Index Mode
6954 //===----------------------------------------------------------------------===//
6955 
6956 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6957 
6958   using namespace llvm::AMDGPU::VGPRIndexMode;
6959 
6960   if (trySkipToken(AsmToken::RParen)) {
6961     return OFF;
6962   }
6963 
6964   int64_t Imm = 0;
6965 
6966   while (true) {
6967     unsigned Mode = 0;
6968     SMLoc S = getLoc();
6969 
6970     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6971       if (trySkipId(IdSymbolic[ModeId])) {
6972         Mode = 1 << ModeId;
6973         break;
6974       }
6975     }
6976 
6977     if (Mode == 0) {
6978       Error(S, (Imm == 0)?
6979                "expected a VGPR index mode or a closing parenthesis" :
6980                "expected a VGPR index mode");
6981       return UNDEF;
6982     }
6983 
6984     if (Imm & Mode) {
6985       Error(S, "duplicate VGPR index mode");
6986       return UNDEF;
6987     }
6988     Imm |= Mode;
6989 
6990     if (trySkipToken(AsmToken::RParen))
6991       break;
6992     if (!skipToken(AsmToken::Comma,
6993                    "expected a comma or a closing parenthesis"))
6994       return UNDEF;
6995   }
6996 
6997   return Imm;
6998 }
6999 
7000 OperandMatchResultTy
7001 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7002 
7003   using namespace llvm::AMDGPU::VGPRIndexMode;
7004 
7005   int64_t Imm = 0;
7006   SMLoc S = getLoc();
7007 
7008   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7009     Imm = parseGPRIdxMacro();
7010     if (Imm == UNDEF)
7011       return MatchOperand_ParseFail;
7012   } else {
7013     if (getParser().parseAbsoluteExpression(Imm))
7014       return MatchOperand_ParseFail;
7015     if (Imm < 0 || !isUInt<4>(Imm)) {
7016       Error(S, "invalid immediate: only 4-bit values are legal");
7017       return MatchOperand_ParseFail;
7018     }
7019   }
7020 
7021   Operands.push_back(
7022       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7023   return MatchOperand_Success;
7024 }
7025 
7026 bool AMDGPUOperand::isGPRIdxMode() const {
7027   return isImmTy(ImmTyGprIdxMode);
7028 }
7029 
7030 //===----------------------------------------------------------------------===//
7031 // sopp branch targets
7032 //===----------------------------------------------------------------------===//
7033 
7034 OperandMatchResultTy
7035 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7036 
7037   // Make sure we are not parsing something
7038   // that looks like a label or an expression but is not.
7039   // This will improve error messages.
7040   if (isRegister() || isModifier())
7041     return MatchOperand_NoMatch;
7042 
7043   if (!parseExpr(Operands))
7044     return MatchOperand_ParseFail;
7045 
7046   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7047   assert(Opr.isImm() || Opr.isExpr());
7048   SMLoc Loc = Opr.getStartLoc();
7049 
7050   // Currently we do not support arbitrary expressions as branch targets.
7051   // Only labels and absolute expressions are accepted.
7052   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7053     Error(Loc, "expected an absolute expression or a label");
7054   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7055     Error(Loc, "expected a 16-bit signed jump offset");
7056   }
7057 
7058   return MatchOperand_Success;
7059 }
7060 
7061 //===----------------------------------------------------------------------===//
7062 // Boolean holding registers
7063 //===----------------------------------------------------------------------===//
7064 
7065 OperandMatchResultTy
7066 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7067   return parseReg(Operands);
7068 }
7069 
7070 //===----------------------------------------------------------------------===//
7071 // mubuf
7072 //===----------------------------------------------------------------------===//
7073 
7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7076 }
7077 
7078 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7079                                    const OperandVector &Operands,
7080                                    bool IsAtomic,
7081                                    bool IsLds) {
7082   bool IsLdsOpcode = IsLds;
7083   bool HasLdsModifier = false;
7084   OptionalImmIndexMap OptionalIdx;
7085   unsigned FirstOperandIdx = 1;
7086   bool IsAtomicReturn = false;
7087 
7088   if (IsAtomic) {
7089     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7090       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7091       if (!Op.isCPol())
7092         continue;
7093       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7094       break;
7095     }
7096 
7097     if (!IsAtomicReturn) {
7098       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7099       if (NewOpc != -1)
7100         Inst.setOpcode(NewOpc);
7101     }
7102 
7103     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7104                       SIInstrFlags::IsAtomicRet;
7105   }
7106 
7107   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7108     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7109 
7110     // Add the register arguments
7111     if (Op.isReg()) {
7112       Op.addRegOperands(Inst, 1);
7113       // Insert a tied src for atomic return dst.
7114       // This cannot be postponed as subsequent calls to
7115       // addImmOperands rely on correct number of MC operands.
7116       if (IsAtomicReturn && i == FirstOperandIdx)
7117         Op.addRegOperands(Inst, 1);
7118       continue;
7119     }
7120 
7121     // Handle the case where soffset is an immediate
7122     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7123       Op.addImmOperands(Inst, 1);
7124       continue;
7125     }
7126 
7127     HasLdsModifier |= Op.isLDS();
7128 
7129     // Handle tokens like 'offen' which are sometimes hard-coded into the
7130     // asm string.  There are no MCInst operands for these.
7131     if (Op.isToken()) {
7132       continue;
7133     }
7134     assert(Op.isImm());
7135 
7136     // Handle optional arguments
7137     OptionalIdx[Op.getImmTy()] = i;
7138   }
7139 
7140   // This is a workaround for an llvm quirk which may result in an
7141   // incorrect instruction selection. Lds and non-lds versions of
7142   // MUBUF instructions are identical except that lds versions
7143   // have mandatory 'lds' modifier. However this modifier follows
7144   // optional modifiers and llvm asm matcher regards this 'lds'
7145   // modifier as an optional one. As a result, an lds version
7146   // of opcode may be selected even if it has no 'lds' modifier.
7147   if (IsLdsOpcode && !HasLdsModifier) {
7148     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7149     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7150       Inst.setOpcode(NoLdsOpcode);
7151       IsLdsOpcode = false;
7152     }
7153   }
7154 
7155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7157 
7158   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7160   }
7161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7162 }
7163 
7164 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7165   OptionalImmIndexMap OptionalIdx;
7166 
7167   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7168     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7169 
7170     // Add the register arguments
7171     if (Op.isReg()) {
7172       Op.addRegOperands(Inst, 1);
7173       continue;
7174     }
7175 
7176     // Handle the case where soffset is an immediate
7177     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7178       Op.addImmOperands(Inst, 1);
7179       continue;
7180     }
7181 
7182     // Handle tokens like 'offen' which are sometimes hard-coded into the
7183     // asm string.  There are no MCInst operands for these.
7184     if (Op.isToken()) {
7185       continue;
7186     }
7187     assert(Op.isImm());
7188 
7189     // Handle optional arguments
7190     OptionalIdx[Op.getImmTy()] = i;
7191   }
7192 
7193   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7194                         AMDGPUOperand::ImmTyOffset);
7195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7196   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7199 }
7200 
7201 //===----------------------------------------------------------------------===//
7202 // mimg
7203 //===----------------------------------------------------------------------===//
7204 
7205 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7206                               bool IsAtomic) {
7207   unsigned I = 1;
7208   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7209   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7210     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7211   }
7212 
7213   if (IsAtomic) {
7214     // Add src, same as dst
7215     assert(Desc.getNumDefs() == 1);
7216     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7217   }
7218 
7219   OptionalImmIndexMap OptionalIdx;
7220 
7221   for (unsigned E = Operands.size(); I != E; ++I) {
7222     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7223 
7224     // Add the register arguments
7225     if (Op.isReg()) {
7226       Op.addRegOperands(Inst, 1);
7227     } else if (Op.isImmModifier()) {
7228       OptionalIdx[Op.getImmTy()] = I;
7229     } else if (!Op.isToken()) {
7230       llvm_unreachable("unexpected operand type");
7231     }
7232   }
7233 
7234   bool IsGFX10Plus = isGFX10Plus();
7235 
7236   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7237   if (IsGFX10Plus)
7238     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7241   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7242   if (IsGFX10Plus)
7243     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7244   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7245     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7246   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7247   if (!IsGFX10Plus)
7248     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7249   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7250 }
7251 
7252 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7253   cvtMIMG(Inst, Operands, true);
7254 }
7255 
7256 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7257   OptionalImmIndexMap OptionalIdx;
7258   bool IsAtomicReturn = false;
7259 
7260   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7261     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7262     if (!Op.isCPol())
7263       continue;
7264     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7265     break;
7266   }
7267 
7268   if (!IsAtomicReturn) {
7269     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7270     if (NewOpc != -1)
7271       Inst.setOpcode(NewOpc);
7272   }
7273 
7274   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7275                     SIInstrFlags::IsAtomicRet;
7276 
7277   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7278     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7279 
7280     // Add the register arguments
7281     if (Op.isReg()) {
7282       Op.addRegOperands(Inst, 1);
7283       if (IsAtomicReturn && i == 1)
7284         Op.addRegOperands(Inst, 1);
7285       continue;
7286     }
7287 
7288     // Handle the case where soffset is an immediate
7289     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7290       Op.addImmOperands(Inst, 1);
7291       continue;
7292     }
7293 
7294     // Handle tokens like 'offen' which are sometimes hard-coded into the
7295     // asm string.  There are no MCInst operands for these.
7296     if (Op.isToken()) {
7297       continue;
7298     }
7299     assert(Op.isImm());
7300 
7301     // Handle optional arguments
7302     OptionalIdx[Op.getImmTy()] = i;
7303   }
7304 
7305   if ((int)Inst.getNumOperands() <=
7306       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7307     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7308   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7309 }
7310 
7311 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7312                                       const OperandVector &Operands) {
7313   for (unsigned I = 1; I < Operands.size(); ++I) {
7314     auto &Operand = (AMDGPUOperand &)*Operands[I];
7315     if (Operand.isReg())
7316       Operand.addRegOperands(Inst, 1);
7317   }
7318 
7319   Inst.addOperand(MCOperand::createImm(1)); // a16
7320 }
7321 
7322 //===----------------------------------------------------------------------===//
7323 // smrd
7324 //===----------------------------------------------------------------------===//
7325 
7326 bool AMDGPUOperand::isSMRDOffset8() const {
7327   return isImm() && isUInt<8>(getImm());
7328 }
7329 
7330 bool AMDGPUOperand::isSMEMOffset() const {
7331   return isImm(); // Offset range is checked later by validator.
7332 }
7333 
7334 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7335   // 32-bit literals are only supported on CI and we only want to use them
7336   // when the offset is > 8-bits.
7337   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7338 }
7339 
7340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7342 }
7343 
7344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7345   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7346 }
7347 
7348 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7349   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7350 }
7351 
7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7353   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7354 }
7355 
7356 //===----------------------------------------------------------------------===//
7357 // vop3
7358 //===----------------------------------------------------------------------===//
7359 
7360 static bool ConvertOmodMul(int64_t &Mul) {
7361   if (Mul != 1 && Mul != 2 && Mul != 4)
7362     return false;
7363 
7364   Mul >>= 1;
7365   return true;
7366 }
7367 
7368 static bool ConvertOmodDiv(int64_t &Div) {
7369   if (Div == 1) {
7370     Div = 0;
7371     return true;
7372   }
7373 
7374   if (Div == 2) {
7375     Div = 3;
7376     return true;
7377   }
7378 
7379   return false;
7380 }
7381 
7382 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7383 // This is intentional and ensures compatibility with sp3.
7384 // See bug 35397 for details.
7385 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7386   if (BoundCtrl == 0 || BoundCtrl == 1) {
7387     BoundCtrl = 1;
7388     return true;
7389   }
7390   return false;
7391 }
7392 
7393 // Note: the order in this table matches the order of operands in AsmString.
7394 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7395   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7396   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7397   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7398   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7399   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7400   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7401   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7402   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7403   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7404   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7405   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7406   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7407   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7408   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7409   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7410   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7411   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7412   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7413   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7414   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7415   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7416   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7417   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7418   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7419   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7420   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7421   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7422   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7423   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7424   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7425   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7426   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7427   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7428   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7429   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7430   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7431   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7432   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7433   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7434   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7435   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7436 };
7437 
7438 void AMDGPUAsmParser::onBeginOfFile() {
7439   if (!getParser().getStreamer().getTargetStreamer() ||
7440       getSTI().getTargetTriple().getArch() == Triple::r600)
7441     return;
7442 
7443   if (!getTargetStreamer().getTargetID())
7444     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7445 
7446   if (isHsaAbiVersion3AndAbove(&getSTI()))
7447     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7448 }
7449 
7450 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7451 
7452   OperandMatchResultTy res = parseOptionalOpr(Operands);
7453 
7454   // This is a hack to enable hardcoded mandatory operands which follow
7455   // optional operands.
7456   //
7457   // Current design assumes that all operands after the first optional operand
7458   // are also optional. However implementation of some instructions violates
7459   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7460   //
7461   // To alleviate this problem, we have to (implicitly) parse extra operands
7462   // to make sure autogenerated parser of custom operands never hit hardcoded
7463   // mandatory operands.
7464 
7465   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7466     if (res != MatchOperand_Success ||
7467         isToken(AsmToken::EndOfStatement))
7468       break;
7469 
7470     trySkipToken(AsmToken::Comma);
7471     res = parseOptionalOpr(Operands);
7472   }
7473 
7474   return res;
7475 }
7476 
7477 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7478   OperandMatchResultTy res;
7479   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7480     // try to parse any optional operand here
7481     if (Op.IsBit) {
7482       res = parseNamedBit(Op.Name, Operands, Op.Type);
7483     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7484       res = parseOModOperand(Operands);
7485     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7486                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7487                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7488       res = parseSDWASel(Operands, Op.Name, Op.Type);
7489     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7490       res = parseSDWADstUnused(Operands);
7491     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7492                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7493                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7494                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7495       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7496                                         Op.ConvertResult);
7497     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7498       res = parseDim(Operands);
7499     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7500       res = parseCPol(Operands);
7501     } else {
7502       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7503     }
7504     if (res != MatchOperand_NoMatch) {
7505       return res;
7506     }
7507   }
7508   return MatchOperand_NoMatch;
7509 }
7510 
7511 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7512   StringRef Name = getTokenStr();
7513   if (Name == "mul") {
7514     return parseIntWithPrefix("mul", Operands,
7515                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7516   }
7517 
7518   if (Name == "div") {
7519     return parseIntWithPrefix("div", Operands,
7520                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7521   }
7522 
7523   return MatchOperand_NoMatch;
7524 }
7525 
7526 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7527   cvtVOP3P(Inst, Operands);
7528 
7529   int Opc = Inst.getOpcode();
7530 
7531   int SrcNum;
7532   const int Ops[] = { AMDGPU::OpName::src0,
7533                       AMDGPU::OpName::src1,
7534                       AMDGPU::OpName::src2 };
7535   for (SrcNum = 0;
7536        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7537        ++SrcNum);
7538   assert(SrcNum > 0);
7539 
7540   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7541   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7542 
7543   if ((OpSel & (1 << SrcNum)) != 0) {
7544     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7545     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7546     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7547   }
7548 }
7549 
7550 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7551       // 1. This operand is input modifiers
7552   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7553       // 2. This is not last operand
7554       && Desc.NumOperands > (OpNum + 1)
7555       // 3. Next operand is register class
7556       && Desc.OpInfo[OpNum + 1].RegClass != -1
7557       // 4. Next register is not tied to any other operand
7558       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7559 }
7560 
7561 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7562 {
7563   OptionalImmIndexMap OptionalIdx;
7564   unsigned Opc = Inst.getOpcode();
7565 
7566   unsigned I = 1;
7567   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7568   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7569     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7570   }
7571 
7572   for (unsigned E = Operands.size(); I != E; ++I) {
7573     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7574     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7575       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7576     } else if (Op.isInterpSlot() ||
7577                Op.isInterpAttr() ||
7578                Op.isAttrChan()) {
7579       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7580     } else if (Op.isImmModifier()) {
7581       OptionalIdx[Op.getImmTy()] = I;
7582     } else {
7583       llvm_unreachable("unhandled operand type");
7584     }
7585   }
7586 
7587   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7588     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7589   }
7590 
7591   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7592     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7593   }
7594 
7595   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7596     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7597   }
7598 }
7599 
7600 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7601                               OptionalImmIndexMap &OptionalIdx) {
7602   unsigned Opc = Inst.getOpcode();
7603 
7604   unsigned I = 1;
7605   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7606   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7607     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7608   }
7609 
7610   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7611     // This instruction has src modifiers
7612     for (unsigned E = Operands.size(); I != E; ++I) {
7613       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7614       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7615         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7616       } else if (Op.isImmModifier()) {
7617         OptionalIdx[Op.getImmTy()] = I;
7618       } else if (Op.isRegOrImm()) {
7619         Op.addRegOrImmOperands(Inst, 1);
7620       } else {
7621         llvm_unreachable("unhandled operand type");
7622       }
7623     }
7624   } else {
7625     // No src modifiers
7626     for (unsigned E = Operands.size(); I != E; ++I) {
7627       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7628       if (Op.isMod()) {
7629         OptionalIdx[Op.getImmTy()] = I;
7630       } else {
7631         Op.addRegOrImmOperands(Inst, 1);
7632       }
7633     }
7634   }
7635 
7636   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7637     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7638   }
7639 
7640   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7641     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7642   }
7643 
7644   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7645   // it has src2 register operand that is tied to dst operand
7646   // we don't allow modifiers for this operand in assembler so src2_modifiers
7647   // should be 0.
7648   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7649       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7650       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7651       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7652       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7653       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7654       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7655       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7656       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7657       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7658       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7659     auto it = Inst.begin();
7660     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7661     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7662     ++it;
7663     // Copy the operand to ensure it's not invalidated when Inst grows.
7664     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7665   }
7666 }
7667 
7668 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7669   OptionalImmIndexMap OptionalIdx;
7670   cvtVOP3(Inst, Operands, OptionalIdx);
7671 }
7672 
7673 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7674                                OptionalImmIndexMap &OptIdx) {
7675   const int Opc = Inst.getOpcode();
7676   const MCInstrDesc &Desc = MII.get(Opc);
7677 
7678   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7679 
7680   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7681     assert(!IsPacked);
7682     Inst.addOperand(Inst.getOperand(0));
7683   }
7684 
7685   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7686   // instruction, and then figure out where to actually put the modifiers
7687 
7688   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7689   if (OpSelIdx != -1) {
7690     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7691   }
7692 
7693   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7694   if (OpSelHiIdx != -1) {
7695     int DefaultVal = IsPacked ? -1 : 0;
7696     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7697                           DefaultVal);
7698   }
7699 
7700   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7701   if (NegLoIdx != -1) {
7702     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7703     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7704   }
7705 
7706   const int Ops[] = { AMDGPU::OpName::src0,
7707                       AMDGPU::OpName::src1,
7708                       AMDGPU::OpName::src2 };
7709   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7710                          AMDGPU::OpName::src1_modifiers,
7711                          AMDGPU::OpName::src2_modifiers };
7712 
7713   unsigned OpSel = 0;
7714   unsigned OpSelHi = 0;
7715   unsigned NegLo = 0;
7716   unsigned NegHi = 0;
7717 
7718   if (OpSelIdx != -1)
7719     OpSel = Inst.getOperand(OpSelIdx).getImm();
7720 
7721   if (OpSelHiIdx != -1)
7722     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7723 
7724   if (NegLoIdx != -1) {
7725     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7726     NegLo = Inst.getOperand(NegLoIdx).getImm();
7727     NegHi = Inst.getOperand(NegHiIdx).getImm();
7728   }
7729 
7730   for (int J = 0; J < 3; ++J) {
7731     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7732     if (OpIdx == -1)
7733       break;
7734 
7735     uint32_t ModVal = 0;
7736 
7737     if ((OpSel & (1 << J)) != 0)
7738       ModVal |= SISrcMods::OP_SEL_0;
7739 
7740     if ((OpSelHi & (1 << J)) != 0)
7741       ModVal |= SISrcMods::OP_SEL_1;
7742 
7743     if ((NegLo & (1 << J)) != 0)
7744       ModVal |= SISrcMods::NEG;
7745 
7746     if ((NegHi & (1 << J)) != 0)
7747       ModVal |= SISrcMods::NEG_HI;
7748 
7749     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7750 
7751     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7752   }
7753 }
7754 
7755 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7756   OptionalImmIndexMap OptIdx;
7757   cvtVOP3(Inst, Operands, OptIdx);
7758   cvtVOP3P(Inst, Operands, OptIdx);
7759 }
7760 
7761 //===----------------------------------------------------------------------===//
7762 // dpp
7763 //===----------------------------------------------------------------------===//
7764 
7765 bool AMDGPUOperand::isDPP8() const {
7766   return isImmTy(ImmTyDPP8);
7767 }
7768 
7769 bool AMDGPUOperand::isDPPCtrl() const {
7770   using namespace AMDGPU::DPP;
7771 
7772   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7773   if (result) {
7774     int64_t Imm = getImm();
7775     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7776            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7777            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7778            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7779            (Imm == DppCtrl::WAVE_SHL1) ||
7780            (Imm == DppCtrl::WAVE_ROL1) ||
7781            (Imm == DppCtrl::WAVE_SHR1) ||
7782            (Imm == DppCtrl::WAVE_ROR1) ||
7783            (Imm == DppCtrl::ROW_MIRROR) ||
7784            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7785            (Imm == DppCtrl::BCAST15) ||
7786            (Imm == DppCtrl::BCAST31) ||
7787            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7788            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7789   }
7790   return false;
7791 }
7792 
7793 //===----------------------------------------------------------------------===//
7794 // mAI
7795 //===----------------------------------------------------------------------===//
7796 
7797 bool AMDGPUOperand::isBLGP() const {
7798   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7799 }
7800 
7801 bool AMDGPUOperand::isCBSZ() const {
7802   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7803 }
7804 
7805 bool AMDGPUOperand::isABID() const {
7806   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7807 }
7808 
7809 bool AMDGPUOperand::isS16Imm() const {
7810   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7811 }
7812 
7813 bool AMDGPUOperand::isU16Imm() const {
7814   return isImm() && isUInt<16>(getImm());
7815 }
7816 
7817 //===----------------------------------------------------------------------===//
7818 // dim
7819 //===----------------------------------------------------------------------===//
7820 
7821 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7822   // We want to allow "dim:1D" etc.,
7823   // but the initial 1 is tokenized as an integer.
7824   std::string Token;
7825   if (isToken(AsmToken::Integer)) {
7826     SMLoc Loc = getToken().getEndLoc();
7827     Token = std::string(getTokenStr());
7828     lex();
7829     if (getLoc() != Loc)
7830       return false;
7831   }
7832 
7833   StringRef Suffix;
7834   if (!parseId(Suffix))
7835     return false;
7836   Token += Suffix;
7837 
7838   StringRef DimId = Token;
7839   if (DimId.startswith("SQ_RSRC_IMG_"))
7840     DimId = DimId.drop_front(12);
7841 
7842   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7843   if (!DimInfo)
7844     return false;
7845 
7846   Encoding = DimInfo->Encoding;
7847   return true;
7848 }
7849 
7850 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7851   if (!isGFX10Plus())
7852     return MatchOperand_NoMatch;
7853 
7854   SMLoc S = getLoc();
7855 
7856   if (!trySkipId("dim", AsmToken::Colon))
7857     return MatchOperand_NoMatch;
7858 
7859   unsigned Encoding;
7860   SMLoc Loc = getLoc();
7861   if (!parseDimId(Encoding)) {
7862     Error(Loc, "invalid dim value");
7863     return MatchOperand_ParseFail;
7864   }
7865 
7866   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7867                                               AMDGPUOperand::ImmTyDim));
7868   return MatchOperand_Success;
7869 }
7870 
7871 //===----------------------------------------------------------------------===//
7872 // dpp
7873 //===----------------------------------------------------------------------===//
7874 
7875 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7876   SMLoc S = getLoc();
7877 
7878   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7879     return MatchOperand_NoMatch;
7880 
7881   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7882 
7883   int64_t Sels[8];
7884 
7885   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7886     return MatchOperand_ParseFail;
7887 
7888   for (size_t i = 0; i < 8; ++i) {
7889     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7890       return MatchOperand_ParseFail;
7891 
7892     SMLoc Loc = getLoc();
7893     if (getParser().parseAbsoluteExpression(Sels[i]))
7894       return MatchOperand_ParseFail;
7895     if (0 > Sels[i] || 7 < Sels[i]) {
7896       Error(Loc, "expected a 3-bit value");
7897       return MatchOperand_ParseFail;
7898     }
7899   }
7900 
7901   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7902     return MatchOperand_ParseFail;
7903 
7904   unsigned DPP8 = 0;
7905   for (size_t i = 0; i < 8; ++i)
7906     DPP8 |= (Sels[i] << (i * 3));
7907 
7908   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7909   return MatchOperand_Success;
7910 }
7911 
7912 bool
7913 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7914                                     const OperandVector &Operands) {
7915   if (Ctrl == "row_newbcast")
7916     return isGFX90A();
7917 
7918   if (Ctrl == "row_share" ||
7919       Ctrl == "row_xmask")
7920     return isGFX10Plus();
7921 
7922   if (Ctrl == "wave_shl" ||
7923       Ctrl == "wave_shr" ||
7924       Ctrl == "wave_rol" ||
7925       Ctrl == "wave_ror" ||
7926       Ctrl == "row_bcast")
7927     return isVI() || isGFX9();
7928 
7929   return Ctrl == "row_mirror" ||
7930          Ctrl == "row_half_mirror" ||
7931          Ctrl == "quad_perm" ||
7932          Ctrl == "row_shl" ||
7933          Ctrl == "row_shr" ||
7934          Ctrl == "row_ror";
7935 }
7936 
7937 int64_t
7938 AMDGPUAsmParser::parseDPPCtrlPerm() {
7939   // quad_perm:[%d,%d,%d,%d]
7940 
7941   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7942     return -1;
7943 
7944   int64_t Val = 0;
7945   for (int i = 0; i < 4; ++i) {
7946     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7947       return -1;
7948 
7949     int64_t Temp;
7950     SMLoc Loc = getLoc();
7951     if (getParser().parseAbsoluteExpression(Temp))
7952       return -1;
7953     if (Temp < 0 || Temp > 3) {
7954       Error(Loc, "expected a 2-bit value");
7955       return -1;
7956     }
7957 
7958     Val += (Temp << i * 2);
7959   }
7960 
7961   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7962     return -1;
7963 
7964   return Val;
7965 }
7966 
7967 int64_t
7968 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7969   using namespace AMDGPU::DPP;
7970 
7971   // sel:%d
7972 
7973   int64_t Val;
7974   SMLoc Loc = getLoc();
7975 
7976   if (getParser().parseAbsoluteExpression(Val))
7977     return -1;
7978 
7979   struct DppCtrlCheck {
7980     int64_t Ctrl;
7981     int Lo;
7982     int Hi;
7983   };
7984 
7985   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7986     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7987     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7988     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7989     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7990     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7991     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7992     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7993     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7994     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7995     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7996     .Default({-1, 0, 0});
7997 
7998   bool Valid;
7999   if (Check.Ctrl == -1) {
8000     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8001     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8002   } else {
8003     Valid = Check.Lo <= Val && Val <= Check.Hi;
8004     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8005   }
8006 
8007   if (!Valid) {
8008     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8009     return -1;
8010   }
8011 
8012   return Val;
8013 }
8014 
8015 OperandMatchResultTy
8016 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8017   using namespace AMDGPU::DPP;
8018 
8019   if (!isToken(AsmToken::Identifier) ||
8020       !isSupportedDPPCtrl(getTokenStr(), Operands))
8021     return MatchOperand_NoMatch;
8022 
8023   SMLoc S = getLoc();
8024   int64_t Val = -1;
8025   StringRef Ctrl;
8026 
8027   parseId(Ctrl);
8028 
8029   if (Ctrl == "row_mirror") {
8030     Val = DppCtrl::ROW_MIRROR;
8031   } else if (Ctrl == "row_half_mirror") {
8032     Val = DppCtrl::ROW_HALF_MIRROR;
8033   } else {
8034     if (skipToken(AsmToken::Colon, "expected a colon")) {
8035       if (Ctrl == "quad_perm") {
8036         Val = parseDPPCtrlPerm();
8037       } else {
8038         Val = parseDPPCtrlSel(Ctrl);
8039       }
8040     }
8041   }
8042 
8043   if (Val == -1)
8044     return MatchOperand_ParseFail;
8045 
8046   Operands.push_back(
8047     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8048   return MatchOperand_Success;
8049 }
8050 
8051 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8052   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8053 }
8054 
8055 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8056   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8057 }
8058 
8059 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8060   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8061 }
8062 
8063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8064   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8065 }
8066 
8067 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8068   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8069 }
8070 
8071 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8072   OptionalImmIndexMap OptionalIdx;
8073 
8074   unsigned Opc = Inst.getOpcode();
8075   bool HasModifiers =
8076       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8077   unsigned I = 1;
8078   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8079   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8080     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8081   }
8082 
8083   int Fi = 0;
8084   for (unsigned E = Operands.size(); I != E; ++I) {
8085     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8086                                             MCOI::TIED_TO);
8087     if (TiedTo != -1) {
8088       assert((unsigned)TiedTo < Inst.getNumOperands());
8089       // handle tied old or src2 for MAC instructions
8090       Inst.addOperand(Inst.getOperand(TiedTo));
8091     }
8092     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8093     // Add the register arguments
8094     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8095       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8096       // Skip it.
8097       continue;
8098     }
8099 
8100     if (IsDPP8) {
8101       if (Op.isDPP8()) {
8102         Op.addImmOperands(Inst, 1);
8103       } else if (HasModifiers &&
8104                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8105         Op.addRegWithFPInputModsOperands(Inst, 2);
8106       } else if (Op.isFI()) {
8107         Fi = Op.getImm();
8108       } else if (Op.isReg()) {
8109         Op.addRegOperands(Inst, 1);
8110       } else {
8111         llvm_unreachable("Invalid operand type");
8112       }
8113     } else {
8114       if (HasModifiers &&
8115           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8116         Op.addRegWithFPInputModsOperands(Inst, 2);
8117       } else if (Op.isReg()) {
8118         Op.addRegOperands(Inst, 1);
8119       } else if (Op.isDPPCtrl()) {
8120         Op.addImmOperands(Inst, 1);
8121       } else if (Op.isImm()) {
8122         // Handle optional arguments
8123         OptionalIdx[Op.getImmTy()] = I;
8124       } else {
8125         llvm_unreachable("Invalid operand type");
8126       }
8127     }
8128   }
8129 
8130   if (IsDPP8) {
8131     using namespace llvm::AMDGPU::DPP;
8132     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8133   } else {
8134     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8135     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8136     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8137     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8138       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8139     }
8140   }
8141 }
8142 
8143 //===----------------------------------------------------------------------===//
8144 // sdwa
8145 //===----------------------------------------------------------------------===//
8146 
8147 OperandMatchResultTy
8148 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8149                               AMDGPUOperand::ImmTy Type) {
8150   using namespace llvm::AMDGPU::SDWA;
8151 
8152   SMLoc S = getLoc();
8153   StringRef Value;
8154   OperandMatchResultTy res;
8155 
8156   SMLoc StringLoc;
8157   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8158   if (res != MatchOperand_Success) {
8159     return res;
8160   }
8161 
8162   int64_t Int;
8163   Int = StringSwitch<int64_t>(Value)
8164         .Case("BYTE_0", SdwaSel::BYTE_0)
8165         .Case("BYTE_1", SdwaSel::BYTE_1)
8166         .Case("BYTE_2", SdwaSel::BYTE_2)
8167         .Case("BYTE_3", SdwaSel::BYTE_3)
8168         .Case("WORD_0", SdwaSel::WORD_0)
8169         .Case("WORD_1", SdwaSel::WORD_1)
8170         .Case("DWORD", SdwaSel::DWORD)
8171         .Default(0xffffffff);
8172 
8173   if (Int == 0xffffffff) {
8174     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8175     return MatchOperand_ParseFail;
8176   }
8177 
8178   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8179   return MatchOperand_Success;
8180 }
8181 
8182 OperandMatchResultTy
8183 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8184   using namespace llvm::AMDGPU::SDWA;
8185 
8186   SMLoc S = getLoc();
8187   StringRef Value;
8188   OperandMatchResultTy res;
8189 
8190   SMLoc StringLoc;
8191   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8192   if (res != MatchOperand_Success) {
8193     return res;
8194   }
8195 
8196   int64_t Int;
8197   Int = StringSwitch<int64_t>(Value)
8198         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8199         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8200         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8201         .Default(0xffffffff);
8202 
8203   if (Int == 0xffffffff) {
8204     Error(StringLoc, "invalid dst_unused value");
8205     return MatchOperand_ParseFail;
8206   }
8207 
8208   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8209   return MatchOperand_Success;
8210 }
8211 
8212 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8213   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8214 }
8215 
8216 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8217   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8218 }
8219 
8220 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8221   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8222 }
8223 
8224 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8225   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8226 }
8227 
8228 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8229   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8230 }
8231 
8232 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8233                               uint64_t BasicInstType,
8234                               bool SkipDstVcc,
8235                               bool SkipSrcVcc) {
8236   using namespace llvm::AMDGPU::SDWA;
8237 
8238   OptionalImmIndexMap OptionalIdx;
8239   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8240   bool SkippedVcc = false;
8241 
8242   unsigned I = 1;
8243   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8244   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8245     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8246   }
8247 
8248   for (unsigned E = Operands.size(); I != E; ++I) {
8249     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8250     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8251         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8252       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8253       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8254       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8255       // Skip VCC only if we didn't skip it on previous iteration.
8256       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8257       if (BasicInstType == SIInstrFlags::VOP2 &&
8258           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8259            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8260         SkippedVcc = true;
8261         continue;
8262       } else if (BasicInstType == SIInstrFlags::VOPC &&
8263                  Inst.getNumOperands() == 0) {
8264         SkippedVcc = true;
8265         continue;
8266       }
8267     }
8268     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8269       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8270     } else if (Op.isImm()) {
8271       // Handle optional arguments
8272       OptionalIdx[Op.getImmTy()] = I;
8273     } else {
8274       llvm_unreachable("Invalid operand type");
8275     }
8276     SkippedVcc = false;
8277   }
8278 
8279   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8280       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8281       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8282     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8283     switch (BasicInstType) {
8284     case SIInstrFlags::VOP1:
8285       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8286       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8287         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8288       }
8289       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8290       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8291       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8292       break;
8293 
8294     case SIInstrFlags::VOP2:
8295       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8296       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8297         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8298       }
8299       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8300       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8301       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8302       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8303       break;
8304 
8305     case SIInstrFlags::VOPC:
8306       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8307         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8308       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8309       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8310       break;
8311 
8312     default:
8313       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8314     }
8315   }
8316 
8317   // special case v_mac_{f16, f32}:
8318   // it has src2 register operand that is tied to dst operand
8319   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8320       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8321     auto it = Inst.begin();
8322     std::advance(
8323       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8324     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8325   }
8326 }
8327 
8328 //===----------------------------------------------------------------------===//
8329 // mAI
8330 //===----------------------------------------------------------------------===//
8331 
8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8333   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8334 }
8335 
8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8337   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8338 }
8339 
8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8342 }
8343 
8344 /// Force static initialization.
8345 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8346   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8347   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8348 }
8349 
8350 #define GET_REGISTER_MATCHER
8351 #define GET_MATCHER_IMPLEMENTATION
8352 #define GET_MNEMONIC_SPELL_CHECKER
8353 #define GET_MNEMONIC_CHECKER
8354 #include "AMDGPUGenAsmMatcher.inc"
8355 
8356 // This fuction should be defined after auto-generated include so that we have
8357 // MatchClassKind enum defined
8358 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8359                                                      unsigned Kind) {
8360   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8361   // But MatchInstructionImpl() expects to meet token and fails to validate
8362   // operand. This method checks if we are given immediate operand but expect to
8363   // get corresponding token.
8364   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8365   switch (Kind) {
8366   case MCK_addr64:
8367     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8368   case MCK_gds:
8369     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8370   case MCK_lds:
8371     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8372   case MCK_idxen:
8373     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8374   case MCK_offen:
8375     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8376   case MCK_SSrcB32:
8377     // When operands have expression values, they will return true for isToken,
8378     // because it is not possible to distinguish between a token and an
8379     // expression at parse time. MatchInstructionImpl() will always try to
8380     // match an operand as a token, when isToken returns true, and when the
8381     // name of the expression is not a valid token, the match will fail,
8382     // so we need to handle it here.
8383     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8384   case MCK_SSrcF32:
8385     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8386   case MCK_SoppBrTarget:
8387     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8388   case MCK_VReg32OrOff:
8389     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8390   case MCK_InterpSlot:
8391     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8392   case MCK_Attr:
8393     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8394   case MCK_AttrChan:
8395     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8396   case MCK_ImmSMEMOffset:
8397     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8398   case MCK_SReg_64:
8399   case MCK_SReg_64_XEXEC:
8400     // Null is defined as a 32-bit register but
8401     // it should also be enabled with 64-bit operands.
8402     // The following code enables it for SReg_64 operands
8403     // used as source and destination. Remaining source
8404     // operands are handled in isInlinableImm.
8405     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8406   default:
8407     return Match_InvalidOperand;
8408   }
8409 }
8410 
8411 //===----------------------------------------------------------------------===//
8412 // endpgm
8413 //===----------------------------------------------------------------------===//
8414 
8415 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8416   SMLoc S = getLoc();
8417   int64_t Imm = 0;
8418 
8419   if (!parseExpr(Imm)) {
8420     // The operand is optional, if not present default to 0
8421     Imm = 0;
8422   }
8423 
8424   if (!isUInt<16>(Imm)) {
8425     Error(S, "expected a 16-bit value");
8426     return MatchOperand_ParseFail;
8427   }
8428 
8429   Operands.push_back(
8430       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8431   return MatchOperand_Success;
8432 }
8433 
8434 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8435