1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1390   bool isGFX90A() const {
1391     return AMDGPU::isGFX90A(getSTI());
1392   }
1393 
1394   bool isGFX940() const {
1395     return AMDGPU::isGFX940(getSTI());
1396   }
1397 
1398   bool isGFX9Plus() const {
1399     return AMDGPU::isGFX9Plus(getSTI());
1400   }
1401 
1402   bool isGFX10() const {
1403     return AMDGPU::isGFX10(getSTI());
1404   }
1405 
1406   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1407 
1408   bool isGFX10_BEncoding() const {
1409     return AMDGPU::isGFX10_BEncoding(getSTI());
1410   }
1411 
1412   bool hasInv2PiInlineImm() const {
1413     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1414   }
1415 
1416   bool hasFlatOffsets() const {
1417     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1418   }
1419 
1420   bool hasArchitectedFlatScratch() const {
1421     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1422   }
1423 
1424   bool hasSGPR102_SGPR103() const {
1425     return !isVI() && !isGFX9();
1426   }
1427 
1428   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1429 
1430   bool hasIntClamp() const {
1431     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1432   }
1433 
1434   AMDGPUTargetStreamer &getTargetStreamer() {
1435     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1436     return static_cast<AMDGPUTargetStreamer &>(TS);
1437   }
1438 
1439   const MCRegisterInfo *getMRI() const {
1440     // We need this const_cast because for some reason getContext() is not const
1441     // in MCAsmParser.
1442     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1443   }
1444 
1445   const MCInstrInfo *getMII() const {
1446     return &MII;
1447   }
1448 
1449   const FeatureBitset &getFeatureBits() const {
1450     return getSTI().getFeatureBits();
1451   }
1452 
1453   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1454   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1455   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1456 
1457   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1458   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1459   bool isForcedDPP() const { return ForcedDPP; }
1460   bool isForcedSDWA() const { return ForcedSDWA; }
1461   ArrayRef<unsigned> getMatchedVariants() const;
1462   StringRef getMatchedVariantName() const;
1463 
1464   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1465   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1466                      bool RestoreOnFailure);
1467   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1468   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1469                                         SMLoc &EndLoc) override;
1470   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1471   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1472                                       unsigned Kind) override;
1473   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1474                                OperandVector &Operands, MCStreamer &Out,
1475                                uint64_t &ErrorInfo,
1476                                bool MatchingInlineAsm) override;
1477   bool ParseDirective(AsmToken DirectiveID) override;
1478   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1479                                     OperandMode Mode = OperandMode_Default);
1480   StringRef parseMnemonicSuffix(StringRef Name);
1481   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1482                         SMLoc NameLoc, OperandVector &Operands) override;
1483   //bool ProcessInstruction(MCInst &Inst);
1484 
1485   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1486 
1487   OperandMatchResultTy
1488   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1489                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1490                      bool (*ConvertResult)(int64_t &) = nullptr);
1491 
1492   OperandMatchResultTy
1493   parseOperandArrayWithPrefix(const char *Prefix,
1494                               OperandVector &Operands,
1495                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1496                               bool (*ConvertResult)(int64_t&) = nullptr);
1497 
1498   OperandMatchResultTy
1499   parseNamedBit(StringRef Name, OperandVector &Operands,
1500                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1501   OperandMatchResultTy parseCPol(OperandVector &Operands);
1502   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1503                                              StringRef &Value,
1504                                              SMLoc &StringLoc);
1505 
1506   bool isModifier();
1507   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1508   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1509   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1510   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1511   bool parseSP3NegModifier();
1512   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1513   OperandMatchResultTy parseReg(OperandVector &Operands);
1514   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1515   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1516   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1517   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1518   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1519   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1520   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1521   OperandMatchResultTy parseUfmt(int64_t &Format);
1522   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1523   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1524   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1525   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1526   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1527   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1528   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1529 
1530   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1531   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1532   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1533   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1534 
1535   bool parseCnt(int64_t &IntVal);
1536   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1537   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1538 
1539 private:
1540   struct OperandInfoTy {
1541     SMLoc Loc;
1542     int64_t Id;
1543     bool IsSymbolic = false;
1544     bool IsDefined = false;
1545     StringRef Name;
1546 
1547     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1548   };
1549 
1550   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1551   bool validateSendMsg(const OperandInfoTy &Msg,
1552                        const OperandInfoTy &Op,
1553                        const OperandInfoTy &Stream);
1554 
1555   bool parseHwregBody(OperandInfoTy &HwReg,
1556                       OperandInfoTy &Offset,
1557                       OperandInfoTy &Width);
1558   bool validateHwreg(const OperandInfoTy &HwReg,
1559                      const OperandInfoTy &Offset,
1560                      const OperandInfoTy &Width);
1561 
1562   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1563   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1564 
1565   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1566                       const OperandVector &Operands) const;
1567   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1568   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1569   SMLoc getLitLoc(const OperandVector &Operands) const;
1570   SMLoc getConstLoc(const OperandVector &Operands) const;
1571 
1572   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1573   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1574   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1575   bool validateSOPLiteral(const MCInst &Inst) const;
1576   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1577   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1578   bool validateIntClampSupported(const MCInst &Inst);
1579   bool validateMIMGAtomicDMask(const MCInst &Inst);
1580   bool validateMIMGGatherDMask(const MCInst &Inst);
1581   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1582   bool validateMIMGDataSize(const MCInst &Inst);
1583   bool validateMIMGAddrSize(const MCInst &Inst);
1584   bool validateMIMGD16(const MCInst &Inst);
1585   bool validateMIMGDim(const MCInst &Inst);
1586   bool validateMIMGMSAA(const MCInst &Inst);
1587   bool validateOpSel(const MCInst &Inst);
1588   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1589   bool validateVccOperand(unsigned Reg) const;
1590   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1591   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1593   bool validateAGPRLdSt(const MCInst &Inst) const;
1594   bool validateVGPRAlign(const MCInst &Inst) const;
1595   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1596   bool validateDivScale(const MCInst &Inst);
1597   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1598                              const SMLoc &IDLoc);
1599   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1600   unsigned getConstantBusLimit(unsigned Opcode) const;
1601   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1602   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1603   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1604 
1605   bool isSupportedMnemo(StringRef Mnemo,
1606                         const FeatureBitset &FBS);
1607   bool isSupportedMnemo(StringRef Mnemo,
1608                         const FeatureBitset &FBS,
1609                         ArrayRef<unsigned> Variants);
1610   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1611 
1612   bool isId(const StringRef Id) const;
1613   bool isId(const AsmToken &Token, const StringRef Id) const;
1614   bool isToken(const AsmToken::TokenKind Kind) const;
1615   bool trySkipId(const StringRef Id);
1616   bool trySkipId(const StringRef Pref, const StringRef Id);
1617   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1618   bool trySkipToken(const AsmToken::TokenKind Kind);
1619   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1620   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1621   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1622 
1623   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1624   AsmToken::TokenKind getTokenKind() const;
1625   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1626   bool parseExpr(OperandVector &Operands);
1627   StringRef getTokenStr() const;
1628   AsmToken peekToken();
1629   AsmToken getToken() const;
1630   SMLoc getLoc() const;
1631   void lex();
1632 
1633 public:
1634   void onBeginOfFile() override;
1635 
1636   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1637   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1638 
1639   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1640   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1641   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1642   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1643   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1644   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1645 
1646   bool parseSwizzleOperand(int64_t &Op,
1647                            const unsigned MinVal,
1648                            const unsigned MaxVal,
1649                            const StringRef ErrMsg,
1650                            SMLoc &Loc);
1651   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1652                             const unsigned MinVal,
1653                             const unsigned MaxVal,
1654                             const StringRef ErrMsg);
1655   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1656   bool parseSwizzleOffset(int64_t &Imm);
1657   bool parseSwizzleMacro(int64_t &Imm);
1658   bool parseSwizzleQuadPerm(int64_t &Imm);
1659   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1660   bool parseSwizzleBroadcast(int64_t &Imm);
1661   bool parseSwizzleSwap(int64_t &Imm);
1662   bool parseSwizzleReverse(int64_t &Imm);
1663 
1664   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1665   int64_t parseGPRIdxMacro();
1666 
1667   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1668   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1669   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1670   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1671 
1672   AMDGPUOperand::Ptr defaultCPol() const;
1673 
1674   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1675   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1676   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1677   AMDGPUOperand::Ptr defaultFlatOffset() const;
1678 
1679   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1680 
1681   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1682                OptionalImmIndexMap &OptionalIdx);
1683   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1684   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1685   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1686   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1687                 OptionalImmIndexMap &OptionalIdx);
1688 
1689   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1690 
1691   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1692                bool IsAtomic = false);
1693   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1694   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1695 
1696   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1697 
1698   bool parseDimId(unsigned &Encoding);
1699   OperandMatchResultTy parseDim(OperandVector &Operands);
1700   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1701   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1702   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1703   int64_t parseDPPCtrlSel(StringRef Ctrl);
1704   int64_t parseDPPCtrlPerm();
1705   AMDGPUOperand::Ptr defaultRowMask() const;
1706   AMDGPUOperand::Ptr defaultBankMask() const;
1707   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1708   AMDGPUOperand::Ptr defaultFI() const;
1709   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1710   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1711 
1712   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1713                                     AMDGPUOperand::ImmTy Type);
1714   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1715   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1716   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1717   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1718   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1719   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1720   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1721                uint64_t BasicInstType,
1722                bool SkipDstVcc = false,
1723                bool SkipSrcVcc = false);
1724 
1725   AMDGPUOperand::Ptr defaultBLGP() const;
1726   AMDGPUOperand::Ptr defaultCBSZ() const;
1727   AMDGPUOperand::Ptr defaultABID() const;
1728 
1729   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1730   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1731 };
1732 
1733 struct OptionalOperand {
1734   const char *Name;
1735   AMDGPUOperand::ImmTy Type;
1736   bool IsBit;
1737   bool (*ConvertResult)(int64_t&);
1738 };
1739 
1740 } // end anonymous namespace
1741 
1742 // May be called with integer type with equivalent bitwidth.
1743 static const fltSemantics *getFltSemantics(unsigned Size) {
1744   switch (Size) {
1745   case 4:
1746     return &APFloat::IEEEsingle();
1747   case 8:
1748     return &APFloat::IEEEdouble();
1749   case 2:
1750     return &APFloat::IEEEhalf();
1751   default:
1752     llvm_unreachable("unsupported fp type");
1753   }
1754 }
1755 
1756 static const fltSemantics *getFltSemantics(MVT VT) {
1757   return getFltSemantics(VT.getSizeInBits() / 8);
1758 }
1759 
1760 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1761   switch (OperandType) {
1762   case AMDGPU::OPERAND_REG_IMM_INT32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32:
1764   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1765   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1766   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1768   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1769   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1770   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1771   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1772   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1773   case AMDGPU::OPERAND_KIMM32:
1774     return &APFloat::IEEEsingle();
1775   case AMDGPU::OPERAND_REG_IMM_INT64:
1776   case AMDGPU::OPERAND_REG_IMM_FP64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1778   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1779   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1780     return &APFloat::IEEEdouble();
1781   case AMDGPU::OPERAND_REG_IMM_INT16:
1782   case AMDGPU::OPERAND_REG_IMM_FP16:
1783   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1784   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1785   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1786   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1787   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1788   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1790   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1791   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1792   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1793   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1794   case AMDGPU::OPERAND_KIMM16:
1795     return &APFloat::IEEEhalf();
1796   default:
1797     llvm_unreachable("unsupported fp type");
1798   }
1799 }
1800 
1801 //===----------------------------------------------------------------------===//
1802 // Operand
1803 //===----------------------------------------------------------------------===//
1804 
1805 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1806   bool Lost;
1807 
1808   // Convert literal to single precision
1809   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1810                                                APFloat::rmNearestTiesToEven,
1811                                                &Lost);
1812   // We allow precision lost but not overflow or underflow
1813   if (Status != APFloat::opOK &&
1814       Lost &&
1815       ((Status & APFloat::opOverflow)  != 0 ||
1816        (Status & APFloat::opUnderflow) != 0)) {
1817     return false;
1818   }
1819 
1820   return true;
1821 }
1822 
1823 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1824   return isUIntN(Size, Val) || isIntN(Size, Val);
1825 }
1826 
1827 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1828   if (VT.getScalarType() == MVT::i16) {
1829     // FP immediate values are broken.
1830     return isInlinableIntLiteral(Val);
1831   }
1832 
1833   // f16/v2f16 operands work correctly for all values.
1834   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1835 }
1836 
1837 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1838 
1839   // This is a hack to enable named inline values like
1840   // shared_base with both 32-bit and 64-bit operands.
1841   // Note that these values are defined as
1842   // 32-bit operands only.
1843   if (isInlineValue()) {
1844     return true;
1845   }
1846 
1847   if (!isImmTy(ImmTyNone)) {
1848     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1849     return false;
1850   }
1851   // TODO: We should avoid using host float here. It would be better to
1852   // check the float bit values which is what a few other places do.
1853   // We've had bot failures before due to weird NaN support on mips hosts.
1854 
1855   APInt Literal(64, Imm.Val);
1856 
1857   if (Imm.IsFPImm) { // We got fp literal token
1858     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1859       return AMDGPU::isInlinableLiteral64(Imm.Val,
1860                                           AsmParser->hasInv2PiInlineImm());
1861     }
1862 
1863     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1864     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1865       return false;
1866 
1867     if (type.getScalarSizeInBits() == 16) {
1868       return isInlineableLiteralOp16(
1869         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1870         type, AsmParser->hasInv2PiInlineImm());
1871     }
1872 
1873     // Check if single precision literal is inlinable
1874     return AMDGPU::isInlinableLiteral32(
1875       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1876       AsmParser->hasInv2PiInlineImm());
1877   }
1878 
1879   // We got int literal token.
1880   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1881     return AMDGPU::isInlinableLiteral64(Imm.Val,
1882                                         AsmParser->hasInv2PiInlineImm());
1883   }
1884 
1885   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1886     return false;
1887   }
1888 
1889   if (type.getScalarSizeInBits() == 16) {
1890     return isInlineableLiteralOp16(
1891       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1892       type, AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   return AMDGPU::isInlinableLiteral32(
1896     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1897     AsmParser->hasInv2PiInlineImm());
1898 }
1899 
1900 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1901   // Check that this immediate can be added as literal
1902   if (!isImmTy(ImmTyNone)) {
1903     return false;
1904   }
1905 
1906   if (!Imm.IsFPImm) {
1907     // We got int literal token.
1908 
1909     if (type == MVT::f64 && hasFPModifiers()) {
1910       // Cannot apply fp modifiers to int literals preserving the same semantics
1911       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1912       // disable these cases.
1913       return false;
1914     }
1915 
1916     unsigned Size = type.getSizeInBits();
1917     if (Size == 64)
1918       Size = 32;
1919 
1920     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1921     // types.
1922     return isSafeTruncation(Imm.Val, Size);
1923   }
1924 
1925   // We got fp literal token
1926   if (type == MVT::f64) { // Expected 64-bit fp operand
1927     // We would set low 64-bits of literal to zeroes but we accept this literals
1928     return true;
1929   }
1930 
1931   if (type == MVT::i64) { // Expected 64-bit int operand
1932     // We don't allow fp literals in 64-bit integer instructions. It is
1933     // unclear how we should encode them.
1934     return false;
1935   }
1936 
1937   // We allow fp literals with f16x2 operands assuming that the specified
1938   // literal goes into the lower half and the upper half is zero. We also
1939   // require that the literal may be losslessly converted to f16.
1940   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1941                      (type == MVT::v2i16)? MVT::i16 :
1942                      (type == MVT::v2f32)? MVT::f32 : type;
1943 
1944   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1945   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1946 }
1947 
1948 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1949   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1950 }
1951 
1952 bool AMDGPUOperand::isVRegWithInputMods() const {
1953   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1954          // GFX90A allows DPP on 64-bit operands.
1955          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1956           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1957 }
1958 
1959 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1960   if (AsmParser->isVI())
1961     return isVReg32();
1962   else if (AsmParser->isGFX9Plus())
1963     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1964   else
1965     return false;
1966 }
1967 
1968 bool AMDGPUOperand::isSDWAFP16Operand() const {
1969   return isSDWAOperand(MVT::f16);
1970 }
1971 
1972 bool AMDGPUOperand::isSDWAFP32Operand() const {
1973   return isSDWAOperand(MVT::f32);
1974 }
1975 
1976 bool AMDGPUOperand::isSDWAInt16Operand() const {
1977   return isSDWAOperand(MVT::i16);
1978 }
1979 
1980 bool AMDGPUOperand::isSDWAInt32Operand() const {
1981   return isSDWAOperand(MVT::i32);
1982 }
1983 
1984 bool AMDGPUOperand::isBoolReg() const {
1985   auto FB = AsmParser->getFeatureBits();
1986   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1987                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1988 }
1989 
1990 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1991 {
1992   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1993   assert(Size == 2 || Size == 4 || Size == 8);
1994 
1995   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1996 
1997   if (Imm.Mods.Abs) {
1998     Val &= ~FpSignMask;
1999   }
2000   if (Imm.Mods.Neg) {
2001     Val ^= FpSignMask;
2002   }
2003 
2004   return Val;
2005 }
2006 
2007 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2008   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2009                              Inst.getNumOperands())) {
2010     addLiteralImmOperand(Inst, Imm.Val,
2011                          ApplyModifiers &
2012                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2013   } else {
2014     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2015     Inst.addOperand(MCOperand::createImm(Imm.Val));
2016     setImmKindNone();
2017   }
2018 }
2019 
2020 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2021   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2022   auto OpNum = Inst.getNumOperands();
2023   // Check that this operand accepts literals
2024   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2025 
2026   if (ApplyModifiers) {
2027     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2028     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2029     Val = applyInputFPModifiers(Val, Size);
2030   }
2031 
2032   APInt Literal(64, Val);
2033   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2034 
2035   if (Imm.IsFPImm) { // We got fp literal token
2036     switch (OpTy) {
2037     case AMDGPU::OPERAND_REG_IMM_INT64:
2038     case AMDGPU::OPERAND_REG_IMM_FP64:
2039     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2040     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2041     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2042       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2043                                        AsmParser->hasInv2PiInlineImm())) {
2044         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2045         setImmKindConst();
2046         return;
2047       }
2048 
2049       // Non-inlineable
2050       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2051         // For fp operands we check if low 32 bits are zeros
2052         if (Literal.getLoBits(32) != 0) {
2053           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2054           "Can't encode literal as exact 64-bit floating-point operand. "
2055           "Low 32-bits will be set to zero");
2056         }
2057 
2058         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2059         setImmKindLiteral();
2060         return;
2061       }
2062 
2063       // We don't allow fp literals in 64-bit integer instructions. It is
2064       // unclear how we should encode them. This case should be checked earlier
2065       // in predicate methods (isLiteralImm())
2066       llvm_unreachable("fp literal in 64-bit integer instruction.");
2067 
2068     case AMDGPU::OPERAND_REG_IMM_INT32:
2069     case AMDGPU::OPERAND_REG_IMM_FP32:
2070     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2071     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2072     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2073     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2074     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2075     case AMDGPU::OPERAND_REG_IMM_INT16:
2076     case AMDGPU::OPERAND_REG_IMM_FP16:
2077     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2078     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2079     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2080     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2081     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2082     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2083     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2085     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2086     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2087     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2088     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2089     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2090     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2091     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2092     case AMDGPU::OPERAND_KIMM32:
2093     case AMDGPU::OPERAND_KIMM16: {
2094       bool lost;
2095       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2096       // Convert literal to single precision
2097       FPLiteral.convert(*getOpFltSemantics(OpTy),
2098                         APFloat::rmNearestTiesToEven, &lost);
2099       // We allow precision lost but not overflow or underflow. This should be
2100       // checked earlier in isLiteralImm()
2101 
2102       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2103       Inst.addOperand(MCOperand::createImm(ImmVal));
2104       setImmKindLiteral();
2105       return;
2106     }
2107     default:
2108       llvm_unreachable("invalid operand size");
2109     }
2110 
2111     return;
2112   }
2113 
2114   // We got int literal token.
2115   // Only sign extend inline immediates.
2116   switch (OpTy) {
2117   case AMDGPU::OPERAND_REG_IMM_INT32:
2118   case AMDGPU::OPERAND_REG_IMM_FP32:
2119   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2120   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2121   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2124   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2125   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2126   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2127   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2128   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2129   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2130     if (isSafeTruncation(Val, 32) &&
2131         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2132                                      AsmParser->hasInv2PiInlineImm())) {
2133       Inst.addOperand(MCOperand::createImm(Val));
2134       setImmKindConst();
2135       return;
2136     }
2137 
2138     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2139     setImmKindLiteral();
2140     return;
2141 
2142   case AMDGPU::OPERAND_REG_IMM_INT64:
2143   case AMDGPU::OPERAND_REG_IMM_FP64:
2144   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2145   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2146   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2147     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2148       Inst.addOperand(MCOperand::createImm(Val));
2149       setImmKindConst();
2150       return;
2151     }
2152 
2153     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2154     setImmKindLiteral();
2155     return;
2156 
2157   case AMDGPU::OPERAND_REG_IMM_INT16:
2158   case AMDGPU::OPERAND_REG_IMM_FP16:
2159   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2163   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2164     if (isSafeTruncation(Val, 16) &&
2165         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2166                                      AsmParser->hasInv2PiInlineImm())) {
2167       Inst.addOperand(MCOperand::createImm(Val));
2168       setImmKindConst();
2169       return;
2170     }
2171 
2172     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2173     setImmKindLiteral();
2174     return;
2175 
2176   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2177   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2179   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2180     assert(isSafeTruncation(Val, 16));
2181     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2182                                         AsmParser->hasInv2PiInlineImm()));
2183 
2184     Inst.addOperand(MCOperand::createImm(Val));
2185     return;
2186   }
2187   case AMDGPU::OPERAND_KIMM32:
2188     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2189     setImmKindNone();
2190     return;
2191   case AMDGPU::OPERAND_KIMM16:
2192     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2193     setImmKindNone();
2194     return;
2195   default:
2196     llvm_unreachable("invalid operand size");
2197   }
2198 }
2199 
2200 template <unsigned Bitwidth>
2201 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2202   APInt Literal(64, Imm.Val);
2203   setImmKindNone();
2204 
2205   if (!Imm.IsFPImm) {
2206     // We got int literal token.
2207     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2208     return;
2209   }
2210 
2211   bool Lost;
2212   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2213   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2214                     APFloat::rmNearestTiesToEven, &Lost);
2215   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2216 }
2217 
2218 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2219   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2220 }
2221 
2222 static bool isInlineValue(unsigned Reg) {
2223   switch (Reg) {
2224   case AMDGPU::SRC_SHARED_BASE:
2225   case AMDGPU::SRC_SHARED_LIMIT:
2226   case AMDGPU::SRC_PRIVATE_BASE:
2227   case AMDGPU::SRC_PRIVATE_LIMIT:
2228   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2229     return true;
2230   case AMDGPU::SRC_VCCZ:
2231   case AMDGPU::SRC_EXECZ:
2232   case AMDGPU::SRC_SCC:
2233     return true;
2234   case AMDGPU::SGPR_NULL:
2235     return true;
2236   default:
2237     return false;
2238   }
2239 }
2240 
2241 bool AMDGPUOperand::isInlineValue() const {
2242   return isRegKind() && ::isInlineValue(getReg());
2243 }
2244 
2245 //===----------------------------------------------------------------------===//
2246 // AsmParser
2247 //===----------------------------------------------------------------------===//
2248 
2249 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2250   if (Is == IS_VGPR) {
2251     switch (RegWidth) {
2252       default: return -1;
2253       case 1: return AMDGPU::VGPR_32RegClassID;
2254       case 2: return AMDGPU::VReg_64RegClassID;
2255       case 3: return AMDGPU::VReg_96RegClassID;
2256       case 4: return AMDGPU::VReg_128RegClassID;
2257       case 5: return AMDGPU::VReg_160RegClassID;
2258       case 6: return AMDGPU::VReg_192RegClassID;
2259       case 7: return AMDGPU::VReg_224RegClassID;
2260       case 8: return AMDGPU::VReg_256RegClassID;
2261       case 16: return AMDGPU::VReg_512RegClassID;
2262       case 32: return AMDGPU::VReg_1024RegClassID;
2263     }
2264   } else if (Is == IS_TTMP) {
2265     switch (RegWidth) {
2266       default: return -1;
2267       case 1: return AMDGPU::TTMP_32RegClassID;
2268       case 2: return AMDGPU::TTMP_64RegClassID;
2269       case 4: return AMDGPU::TTMP_128RegClassID;
2270       case 8: return AMDGPU::TTMP_256RegClassID;
2271       case 16: return AMDGPU::TTMP_512RegClassID;
2272     }
2273   } else if (Is == IS_SGPR) {
2274     switch (RegWidth) {
2275       default: return -1;
2276       case 1: return AMDGPU::SGPR_32RegClassID;
2277       case 2: return AMDGPU::SGPR_64RegClassID;
2278       case 3: return AMDGPU::SGPR_96RegClassID;
2279       case 4: return AMDGPU::SGPR_128RegClassID;
2280       case 5: return AMDGPU::SGPR_160RegClassID;
2281       case 6: return AMDGPU::SGPR_192RegClassID;
2282       case 7: return AMDGPU::SGPR_224RegClassID;
2283       case 8: return AMDGPU::SGPR_256RegClassID;
2284       case 16: return AMDGPU::SGPR_512RegClassID;
2285     }
2286   } else if (Is == IS_AGPR) {
2287     switch (RegWidth) {
2288       default: return -1;
2289       case 1: return AMDGPU::AGPR_32RegClassID;
2290       case 2: return AMDGPU::AReg_64RegClassID;
2291       case 3: return AMDGPU::AReg_96RegClassID;
2292       case 4: return AMDGPU::AReg_128RegClassID;
2293       case 5: return AMDGPU::AReg_160RegClassID;
2294       case 6: return AMDGPU::AReg_192RegClassID;
2295       case 7: return AMDGPU::AReg_224RegClassID;
2296       case 8: return AMDGPU::AReg_256RegClassID;
2297       case 16: return AMDGPU::AReg_512RegClassID;
2298       case 32: return AMDGPU::AReg_1024RegClassID;
2299     }
2300   }
2301   return -1;
2302 }
2303 
2304 static unsigned getSpecialRegForName(StringRef RegName) {
2305   return StringSwitch<unsigned>(RegName)
2306     .Case("exec", AMDGPU::EXEC)
2307     .Case("vcc", AMDGPU::VCC)
2308     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2309     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2310     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2311     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2312     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2313     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2314     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2315     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2316     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2317     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2318     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2319     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2320     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2321     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2322     .Case("m0", AMDGPU::M0)
2323     .Case("vccz", AMDGPU::SRC_VCCZ)
2324     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2325     .Case("execz", AMDGPU::SRC_EXECZ)
2326     .Case("src_execz", AMDGPU::SRC_EXECZ)
2327     .Case("scc", AMDGPU::SRC_SCC)
2328     .Case("src_scc", AMDGPU::SRC_SCC)
2329     .Case("tba", AMDGPU::TBA)
2330     .Case("tma", AMDGPU::TMA)
2331     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2332     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2333     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2334     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2335     .Case("vcc_lo", AMDGPU::VCC_LO)
2336     .Case("vcc_hi", AMDGPU::VCC_HI)
2337     .Case("exec_lo", AMDGPU::EXEC_LO)
2338     .Case("exec_hi", AMDGPU::EXEC_HI)
2339     .Case("tma_lo", AMDGPU::TMA_LO)
2340     .Case("tma_hi", AMDGPU::TMA_HI)
2341     .Case("tba_lo", AMDGPU::TBA_LO)
2342     .Case("tba_hi", AMDGPU::TBA_HI)
2343     .Case("pc", AMDGPU::PC_REG)
2344     .Case("null", AMDGPU::SGPR_NULL)
2345     .Default(AMDGPU::NoRegister);
2346 }
2347 
2348 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2349                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2350   auto R = parseRegister();
2351   if (!R) return true;
2352   assert(R->isReg());
2353   RegNo = R->getReg();
2354   StartLoc = R->getStartLoc();
2355   EndLoc = R->getEndLoc();
2356   return false;
2357 }
2358 
2359 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2360                                     SMLoc &EndLoc) {
2361   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2362 }
2363 
2364 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2365                                                        SMLoc &StartLoc,
2366                                                        SMLoc &EndLoc) {
2367   bool Result =
2368       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2369   bool PendingErrors = getParser().hasPendingError();
2370   getParser().clearPendingErrors();
2371   if (PendingErrors)
2372     return MatchOperand_ParseFail;
2373   if (Result)
2374     return MatchOperand_NoMatch;
2375   return MatchOperand_Success;
2376 }
2377 
2378 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2379                                             RegisterKind RegKind, unsigned Reg1,
2380                                             SMLoc Loc) {
2381   switch (RegKind) {
2382   case IS_SPECIAL:
2383     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2384       Reg = AMDGPU::EXEC;
2385       RegWidth = 2;
2386       return true;
2387     }
2388     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2389       Reg = AMDGPU::FLAT_SCR;
2390       RegWidth = 2;
2391       return true;
2392     }
2393     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2394       Reg = AMDGPU::XNACK_MASK;
2395       RegWidth = 2;
2396       return true;
2397     }
2398     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2399       Reg = AMDGPU::VCC;
2400       RegWidth = 2;
2401       return true;
2402     }
2403     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2404       Reg = AMDGPU::TBA;
2405       RegWidth = 2;
2406       return true;
2407     }
2408     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2409       Reg = AMDGPU::TMA;
2410       RegWidth = 2;
2411       return true;
2412     }
2413     Error(Loc, "register does not fit in the list");
2414     return false;
2415   case IS_VGPR:
2416   case IS_SGPR:
2417   case IS_AGPR:
2418   case IS_TTMP:
2419     if (Reg1 != Reg + RegWidth) {
2420       Error(Loc, "registers in a list must have consecutive indices");
2421       return false;
2422     }
2423     RegWidth++;
2424     return true;
2425   default:
2426     llvm_unreachable("unexpected register kind");
2427   }
2428 }
2429 
2430 struct RegInfo {
2431   StringLiteral Name;
2432   RegisterKind Kind;
2433 };
2434 
2435 static constexpr RegInfo RegularRegisters[] = {
2436   {{"v"},    IS_VGPR},
2437   {{"s"},    IS_SGPR},
2438   {{"ttmp"}, IS_TTMP},
2439   {{"acc"},  IS_AGPR},
2440   {{"a"},    IS_AGPR},
2441 };
2442 
2443 static bool isRegularReg(RegisterKind Kind) {
2444   return Kind == IS_VGPR ||
2445          Kind == IS_SGPR ||
2446          Kind == IS_TTMP ||
2447          Kind == IS_AGPR;
2448 }
2449 
2450 static const RegInfo* getRegularRegInfo(StringRef Str) {
2451   for (const RegInfo &Reg : RegularRegisters)
2452     if (Str.startswith(Reg.Name))
2453       return &Reg;
2454   return nullptr;
2455 }
2456 
2457 static bool getRegNum(StringRef Str, unsigned& Num) {
2458   return !Str.getAsInteger(10, Num);
2459 }
2460 
2461 bool
2462 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2463                             const AsmToken &NextToken) const {
2464 
2465   // A list of consecutive registers: [s0,s1,s2,s3]
2466   if (Token.is(AsmToken::LBrac))
2467     return true;
2468 
2469   if (!Token.is(AsmToken::Identifier))
2470     return false;
2471 
2472   // A single register like s0 or a range of registers like s[0:1]
2473 
2474   StringRef Str = Token.getString();
2475   const RegInfo *Reg = getRegularRegInfo(Str);
2476   if (Reg) {
2477     StringRef RegName = Reg->Name;
2478     StringRef RegSuffix = Str.substr(RegName.size());
2479     if (!RegSuffix.empty()) {
2480       unsigned Num;
2481       // A single register with an index: rXX
2482       if (getRegNum(RegSuffix, Num))
2483         return true;
2484     } else {
2485       // A range of registers: r[XX:YY].
2486       if (NextToken.is(AsmToken::LBrac))
2487         return true;
2488     }
2489   }
2490 
2491   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2492 }
2493 
2494 bool
2495 AMDGPUAsmParser::isRegister()
2496 {
2497   return isRegister(getToken(), peekToken());
2498 }
2499 
2500 unsigned
2501 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2502                                unsigned RegNum,
2503                                unsigned RegWidth,
2504                                SMLoc Loc) {
2505 
2506   assert(isRegularReg(RegKind));
2507 
2508   unsigned AlignSize = 1;
2509   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2510     // SGPR and TTMP registers must be aligned.
2511     // Max required alignment is 4 dwords.
2512     AlignSize = std::min(RegWidth, 4u);
2513   }
2514 
2515   if (RegNum % AlignSize != 0) {
2516     Error(Loc, "invalid register alignment");
2517     return AMDGPU::NoRegister;
2518   }
2519 
2520   unsigned RegIdx = RegNum / AlignSize;
2521   int RCID = getRegClass(RegKind, RegWidth);
2522   if (RCID == -1) {
2523     Error(Loc, "invalid or unsupported register size");
2524     return AMDGPU::NoRegister;
2525   }
2526 
2527   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2528   const MCRegisterClass RC = TRI->getRegClass(RCID);
2529   if (RegIdx >= RC.getNumRegs()) {
2530     Error(Loc, "register index is out of range");
2531     return AMDGPU::NoRegister;
2532   }
2533 
2534   return RC.getRegister(RegIdx);
2535 }
2536 
2537 bool
2538 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2539   int64_t RegLo, RegHi;
2540   if (!skipToken(AsmToken::LBrac, "missing register index"))
2541     return false;
2542 
2543   SMLoc FirstIdxLoc = getLoc();
2544   SMLoc SecondIdxLoc;
2545 
2546   if (!parseExpr(RegLo))
2547     return false;
2548 
2549   if (trySkipToken(AsmToken::Colon)) {
2550     SecondIdxLoc = getLoc();
2551     if (!parseExpr(RegHi))
2552       return false;
2553   } else {
2554     RegHi = RegLo;
2555   }
2556 
2557   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2558     return false;
2559 
2560   if (!isUInt<32>(RegLo)) {
2561     Error(FirstIdxLoc, "invalid register index");
2562     return false;
2563   }
2564 
2565   if (!isUInt<32>(RegHi)) {
2566     Error(SecondIdxLoc, "invalid register index");
2567     return false;
2568   }
2569 
2570   if (RegLo > RegHi) {
2571     Error(FirstIdxLoc, "first register index should not exceed second index");
2572     return false;
2573   }
2574 
2575   Num = static_cast<unsigned>(RegLo);
2576   Width = (RegHi - RegLo) + 1;
2577   return true;
2578 }
2579 
2580 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2581                                           unsigned &RegNum, unsigned &RegWidth,
2582                                           SmallVectorImpl<AsmToken> &Tokens) {
2583   assert(isToken(AsmToken::Identifier));
2584   unsigned Reg = getSpecialRegForName(getTokenStr());
2585   if (Reg) {
2586     RegNum = 0;
2587     RegWidth = 1;
2588     RegKind = IS_SPECIAL;
2589     Tokens.push_back(getToken());
2590     lex(); // skip register name
2591   }
2592   return Reg;
2593 }
2594 
2595 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2596                                           unsigned &RegNum, unsigned &RegWidth,
2597                                           SmallVectorImpl<AsmToken> &Tokens) {
2598   assert(isToken(AsmToken::Identifier));
2599   StringRef RegName = getTokenStr();
2600   auto Loc = getLoc();
2601 
2602   const RegInfo *RI = getRegularRegInfo(RegName);
2603   if (!RI) {
2604     Error(Loc, "invalid register name");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   Tokens.push_back(getToken());
2609   lex(); // skip register name
2610 
2611   RegKind = RI->Kind;
2612   StringRef RegSuffix = RegName.substr(RI->Name.size());
2613   if (!RegSuffix.empty()) {
2614     // Single 32-bit register: vXX.
2615     if (!getRegNum(RegSuffix, RegNum)) {
2616       Error(Loc, "invalid register index");
2617       return AMDGPU::NoRegister;
2618     }
2619     RegWidth = 1;
2620   } else {
2621     // Range of registers: v[XX:YY]. ":YY" is optional.
2622     if (!ParseRegRange(RegNum, RegWidth))
2623       return AMDGPU::NoRegister;
2624   }
2625 
2626   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2627 }
2628 
2629 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2630                                        unsigned &RegWidth,
2631                                        SmallVectorImpl<AsmToken> &Tokens) {
2632   unsigned Reg = AMDGPU::NoRegister;
2633   auto ListLoc = getLoc();
2634 
2635   if (!skipToken(AsmToken::LBrac,
2636                  "expected a register or a list of registers")) {
2637     return AMDGPU::NoRegister;
2638   }
2639 
2640   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2641 
2642   auto Loc = getLoc();
2643   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2644     return AMDGPU::NoRegister;
2645   if (RegWidth != 1) {
2646     Error(Loc, "expected a single 32-bit register");
2647     return AMDGPU::NoRegister;
2648   }
2649 
2650   for (; trySkipToken(AsmToken::Comma); ) {
2651     RegisterKind NextRegKind;
2652     unsigned NextReg, NextRegNum, NextRegWidth;
2653     Loc = getLoc();
2654 
2655     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2656                              NextRegNum, NextRegWidth,
2657                              Tokens)) {
2658       return AMDGPU::NoRegister;
2659     }
2660     if (NextRegWidth != 1) {
2661       Error(Loc, "expected a single 32-bit register");
2662       return AMDGPU::NoRegister;
2663     }
2664     if (NextRegKind != RegKind) {
2665       Error(Loc, "registers in a list must be of the same kind");
2666       return AMDGPU::NoRegister;
2667     }
2668     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2669       return AMDGPU::NoRegister;
2670   }
2671 
2672   if (!skipToken(AsmToken::RBrac,
2673                  "expected a comma or a closing square bracket")) {
2674     return AMDGPU::NoRegister;
2675   }
2676 
2677   if (isRegularReg(RegKind))
2678     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2679 
2680   return Reg;
2681 }
2682 
2683 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2684                                           unsigned &RegNum, unsigned &RegWidth,
2685                                           SmallVectorImpl<AsmToken> &Tokens) {
2686   auto Loc = getLoc();
2687   Reg = AMDGPU::NoRegister;
2688 
2689   if (isToken(AsmToken::Identifier)) {
2690     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2691     if (Reg == AMDGPU::NoRegister)
2692       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2693   } else {
2694     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2695   }
2696 
2697   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2698   if (Reg == AMDGPU::NoRegister) {
2699     assert(Parser.hasPendingError());
2700     return false;
2701   }
2702 
2703   if (!subtargetHasRegister(*TRI, Reg)) {
2704     if (Reg == AMDGPU::SGPR_NULL) {
2705       Error(Loc, "'null' operand is not supported on this GPU");
2706     } else {
2707       Error(Loc, "register not available on this GPU");
2708     }
2709     return false;
2710   }
2711 
2712   return true;
2713 }
2714 
2715 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2716                                           unsigned &RegNum, unsigned &RegWidth,
2717                                           bool RestoreOnFailure /*=false*/) {
2718   Reg = AMDGPU::NoRegister;
2719 
2720   SmallVector<AsmToken, 1> Tokens;
2721   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2722     if (RestoreOnFailure) {
2723       while (!Tokens.empty()) {
2724         getLexer().UnLex(Tokens.pop_back_val());
2725       }
2726     }
2727     return true;
2728   }
2729   return false;
2730 }
2731 
2732 Optional<StringRef>
2733 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2734   switch (RegKind) {
2735   case IS_VGPR:
2736     return StringRef(".amdgcn.next_free_vgpr");
2737   case IS_SGPR:
2738     return StringRef(".amdgcn.next_free_sgpr");
2739   default:
2740     return None;
2741   }
2742 }
2743 
2744 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2745   auto SymbolName = getGprCountSymbolName(RegKind);
2746   assert(SymbolName && "initializing invalid register kind");
2747   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2748   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2749 }
2750 
2751 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2752                                             unsigned DwordRegIndex,
2753                                             unsigned RegWidth) {
2754   // Symbols are only defined for GCN targets
2755   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2756     return true;
2757 
2758   auto SymbolName = getGprCountSymbolName(RegKind);
2759   if (!SymbolName)
2760     return true;
2761   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2762 
2763   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2764   int64_t OldCount;
2765 
2766   if (!Sym->isVariable())
2767     return !Error(getLoc(),
2768                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2769   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2770     return !Error(
2771         getLoc(),
2772         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2773 
2774   if (OldCount <= NewMax)
2775     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2776 
2777   return true;
2778 }
2779 
2780 std::unique_ptr<AMDGPUOperand>
2781 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2782   const auto &Tok = getToken();
2783   SMLoc StartLoc = Tok.getLoc();
2784   SMLoc EndLoc = Tok.getEndLoc();
2785   RegisterKind RegKind;
2786   unsigned Reg, RegNum, RegWidth;
2787 
2788   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2789     return nullptr;
2790   }
2791   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2792     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2793       return nullptr;
2794   } else
2795     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2796   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2797 }
2798 
2799 OperandMatchResultTy
2800 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2801   // TODO: add syntactic sugar for 1/(2*PI)
2802 
2803   assert(!isRegister());
2804   assert(!isModifier());
2805 
2806   const auto& Tok = getToken();
2807   const auto& NextTok = peekToken();
2808   bool IsReal = Tok.is(AsmToken::Real);
2809   SMLoc S = getLoc();
2810   bool Negate = false;
2811 
2812   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2813     lex();
2814     IsReal = true;
2815     Negate = true;
2816   }
2817 
2818   if (IsReal) {
2819     // Floating-point expressions are not supported.
2820     // Can only allow floating-point literals with an
2821     // optional sign.
2822 
2823     StringRef Num = getTokenStr();
2824     lex();
2825 
2826     APFloat RealVal(APFloat::IEEEdouble());
2827     auto roundMode = APFloat::rmNearestTiesToEven;
2828     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2829       return MatchOperand_ParseFail;
2830     }
2831     if (Negate)
2832       RealVal.changeSign();
2833 
2834     Operands.push_back(
2835       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2836                                AMDGPUOperand::ImmTyNone, true));
2837 
2838     return MatchOperand_Success;
2839 
2840   } else {
2841     int64_t IntVal;
2842     const MCExpr *Expr;
2843     SMLoc S = getLoc();
2844 
2845     if (HasSP3AbsModifier) {
2846       // This is a workaround for handling expressions
2847       // as arguments of SP3 'abs' modifier, for example:
2848       //     |1.0|
2849       //     |-1|
2850       //     |1+x|
2851       // This syntax is not compatible with syntax of standard
2852       // MC expressions (due to the trailing '|').
2853       SMLoc EndLoc;
2854       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2855         return MatchOperand_ParseFail;
2856     } else {
2857       if (Parser.parseExpression(Expr))
2858         return MatchOperand_ParseFail;
2859     }
2860 
2861     if (Expr->evaluateAsAbsolute(IntVal)) {
2862       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2863     } else {
2864       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2865     }
2866 
2867     return MatchOperand_Success;
2868   }
2869 
2870   return MatchOperand_NoMatch;
2871 }
2872 
2873 OperandMatchResultTy
2874 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2875   if (!isRegister())
2876     return MatchOperand_NoMatch;
2877 
2878   if (auto R = parseRegister()) {
2879     assert(R->isReg());
2880     Operands.push_back(std::move(R));
2881     return MatchOperand_Success;
2882   }
2883   return MatchOperand_ParseFail;
2884 }
2885 
2886 OperandMatchResultTy
2887 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2888   auto res = parseReg(Operands);
2889   if (res != MatchOperand_NoMatch) {
2890     return res;
2891   } else if (isModifier()) {
2892     return MatchOperand_NoMatch;
2893   } else {
2894     return parseImm(Operands, HasSP3AbsMod);
2895   }
2896 }
2897 
2898 bool
2899 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2900   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2901     const auto &str = Token.getString();
2902     return str == "abs" || str == "neg" || str == "sext";
2903   }
2904   return false;
2905 }
2906 
2907 bool
2908 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2909   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2910 }
2911 
2912 bool
2913 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2914   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2915 }
2916 
2917 bool
2918 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2919   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2920 }
2921 
2922 // Check if this is an operand modifier or an opcode modifier
2923 // which may look like an expression but it is not. We should
2924 // avoid parsing these modifiers as expressions. Currently
2925 // recognized sequences are:
2926 //   |...|
2927 //   abs(...)
2928 //   neg(...)
2929 //   sext(...)
2930 //   -reg
2931 //   -|...|
2932 //   -abs(...)
2933 //   name:...
2934 // Note that simple opcode modifiers like 'gds' may be parsed as
2935 // expressions; this is a special case. See getExpressionAsToken.
2936 //
2937 bool
2938 AMDGPUAsmParser::isModifier() {
2939 
2940   AsmToken Tok = getToken();
2941   AsmToken NextToken[2];
2942   peekTokens(NextToken);
2943 
2944   return isOperandModifier(Tok, NextToken[0]) ||
2945          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2946          isOpcodeModifierWithVal(Tok, NextToken[0]);
2947 }
2948 
2949 // Check if the current token is an SP3 'neg' modifier.
2950 // Currently this modifier is allowed in the following context:
2951 //
2952 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2953 // 2. Before an 'abs' modifier: -abs(...)
2954 // 3. Before an SP3 'abs' modifier: -|...|
2955 //
2956 // In all other cases "-" is handled as a part
2957 // of an expression that follows the sign.
2958 //
2959 // Note: When "-" is followed by an integer literal,
2960 // this is interpreted as integer negation rather
2961 // than a floating-point NEG modifier applied to N.
2962 // Beside being contr-intuitive, such use of floating-point
2963 // NEG modifier would have resulted in different meaning
2964 // of integer literals used with VOP1/2/C and VOP3,
2965 // for example:
2966 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2967 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2968 // Negative fp literals with preceding "-" are
2969 // handled likewise for uniformity
2970 //
2971 bool
2972 AMDGPUAsmParser::parseSP3NegModifier() {
2973 
2974   AsmToken NextToken[2];
2975   peekTokens(NextToken);
2976 
2977   if (isToken(AsmToken::Minus) &&
2978       (isRegister(NextToken[0], NextToken[1]) ||
2979        NextToken[0].is(AsmToken::Pipe) ||
2980        isId(NextToken[0], "abs"))) {
2981     lex();
2982     return true;
2983   }
2984 
2985   return false;
2986 }
2987 
2988 OperandMatchResultTy
2989 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2990                                               bool AllowImm) {
2991   bool Neg, SP3Neg;
2992   bool Abs, SP3Abs;
2993   SMLoc Loc;
2994 
2995   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2996   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2997     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2998     return MatchOperand_ParseFail;
2999   }
3000 
3001   SP3Neg = parseSP3NegModifier();
3002 
3003   Loc = getLoc();
3004   Neg = trySkipId("neg");
3005   if (Neg && SP3Neg) {
3006     Error(Loc, "expected register or immediate");
3007     return MatchOperand_ParseFail;
3008   }
3009   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3010     return MatchOperand_ParseFail;
3011 
3012   Abs = trySkipId("abs");
3013   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3014     return MatchOperand_ParseFail;
3015 
3016   Loc = getLoc();
3017   SP3Abs = trySkipToken(AsmToken::Pipe);
3018   if (Abs && SP3Abs) {
3019     Error(Loc, "expected register or immediate");
3020     return MatchOperand_ParseFail;
3021   }
3022 
3023   OperandMatchResultTy Res;
3024   if (AllowImm) {
3025     Res = parseRegOrImm(Operands, SP3Abs);
3026   } else {
3027     Res = parseReg(Operands);
3028   }
3029   if (Res != MatchOperand_Success) {
3030     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3031   }
3032 
3033   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3034     return MatchOperand_ParseFail;
3035   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3036     return MatchOperand_ParseFail;
3037   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3038     return MatchOperand_ParseFail;
3039 
3040   AMDGPUOperand::Modifiers Mods;
3041   Mods.Abs = Abs || SP3Abs;
3042   Mods.Neg = Neg || SP3Neg;
3043 
3044   if (Mods.hasFPModifiers()) {
3045     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3046     if (Op.isExpr()) {
3047       Error(Op.getStartLoc(), "expected an absolute expression");
3048       return MatchOperand_ParseFail;
3049     }
3050     Op.setModifiers(Mods);
3051   }
3052   return MatchOperand_Success;
3053 }
3054 
3055 OperandMatchResultTy
3056 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3057                                                bool AllowImm) {
3058   bool Sext = trySkipId("sext");
3059   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3060     return MatchOperand_ParseFail;
3061 
3062   OperandMatchResultTy Res;
3063   if (AllowImm) {
3064     Res = parseRegOrImm(Operands);
3065   } else {
3066     Res = parseReg(Operands);
3067   }
3068   if (Res != MatchOperand_Success) {
3069     return Sext? MatchOperand_ParseFail : Res;
3070   }
3071 
3072   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3073     return MatchOperand_ParseFail;
3074 
3075   AMDGPUOperand::Modifiers Mods;
3076   Mods.Sext = Sext;
3077 
3078   if (Mods.hasIntModifiers()) {
3079     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3080     if (Op.isExpr()) {
3081       Error(Op.getStartLoc(), "expected an absolute expression");
3082       return MatchOperand_ParseFail;
3083     }
3084     Op.setModifiers(Mods);
3085   }
3086 
3087   return MatchOperand_Success;
3088 }
3089 
3090 OperandMatchResultTy
3091 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3092   return parseRegOrImmWithFPInputMods(Operands, false);
3093 }
3094 
3095 OperandMatchResultTy
3096 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3097   return parseRegOrImmWithIntInputMods(Operands, false);
3098 }
3099 
3100 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3101   auto Loc = getLoc();
3102   if (trySkipId("off")) {
3103     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3104                                                 AMDGPUOperand::ImmTyOff, false));
3105     return MatchOperand_Success;
3106   }
3107 
3108   if (!isRegister())
3109     return MatchOperand_NoMatch;
3110 
3111   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3112   if (Reg) {
3113     Operands.push_back(std::move(Reg));
3114     return MatchOperand_Success;
3115   }
3116 
3117   return MatchOperand_ParseFail;
3118 
3119 }
3120 
3121 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3122   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3123 
3124   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3125       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3126       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3127       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3128     return Match_InvalidOperand;
3129 
3130   if ((TSFlags & SIInstrFlags::VOP3) &&
3131       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3132       getForcedEncodingSize() != 64)
3133     return Match_PreferE32;
3134 
3135   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3136       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3137     // v_mac_f32/16 allow only dst_sel == DWORD;
3138     auto OpNum =
3139         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3140     const auto &Op = Inst.getOperand(OpNum);
3141     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3142       return Match_InvalidOperand;
3143     }
3144   }
3145 
3146   return Match_Success;
3147 }
3148 
3149 static ArrayRef<unsigned> getAllVariants() {
3150   static const unsigned Variants[] = {
3151     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3152     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3153   };
3154 
3155   return makeArrayRef(Variants);
3156 }
3157 
3158 // What asm variants we should check
3159 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3160   if (getForcedEncodingSize() == 32) {
3161     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3162     return makeArrayRef(Variants);
3163   }
3164 
3165   if (isForcedVOP3()) {
3166     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3167     return makeArrayRef(Variants);
3168   }
3169 
3170   if (isForcedSDWA()) {
3171     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3172                                         AMDGPUAsmVariants::SDWA9};
3173     return makeArrayRef(Variants);
3174   }
3175 
3176   if (isForcedDPP()) {
3177     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3178     return makeArrayRef(Variants);
3179   }
3180 
3181   return getAllVariants();
3182 }
3183 
3184 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3185   if (getForcedEncodingSize() == 32)
3186     return "e32";
3187 
3188   if (isForcedVOP3())
3189     return "e64";
3190 
3191   if (isForcedSDWA())
3192     return "sdwa";
3193 
3194   if (isForcedDPP())
3195     return "dpp";
3196 
3197   return "";
3198 }
3199 
3200 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3201   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3202   const unsigned Num = Desc.getNumImplicitUses();
3203   for (unsigned i = 0; i < Num; ++i) {
3204     unsigned Reg = Desc.ImplicitUses[i];
3205     switch (Reg) {
3206     case AMDGPU::FLAT_SCR:
3207     case AMDGPU::VCC:
3208     case AMDGPU::VCC_LO:
3209     case AMDGPU::VCC_HI:
3210     case AMDGPU::M0:
3211       return Reg;
3212     default:
3213       break;
3214     }
3215   }
3216   return AMDGPU::NoRegister;
3217 }
3218 
3219 // NB: This code is correct only when used to check constant
3220 // bus limitations because GFX7 support no f16 inline constants.
3221 // Note that there are no cases when a GFX7 opcode violates
3222 // constant bus limitations due to the use of an f16 constant.
3223 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3224                                        unsigned OpIdx) const {
3225   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3226 
3227   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3228     return false;
3229   }
3230 
3231   const MCOperand &MO = Inst.getOperand(OpIdx);
3232 
3233   int64_t Val = MO.getImm();
3234   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3235 
3236   switch (OpSize) { // expected operand size
3237   case 8:
3238     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3239   case 4:
3240     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3241   case 2: {
3242     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3243     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3245         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3246       return AMDGPU::isInlinableIntLiteral(Val);
3247 
3248     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3249         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3250         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3251       return AMDGPU::isInlinableIntLiteralV216(Val);
3252 
3253     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3254         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3255         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3256       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3257 
3258     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3259   }
3260   default:
3261     llvm_unreachable("invalid operand size");
3262   }
3263 }
3264 
3265 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3266   if (!isGFX10Plus())
3267     return 1;
3268 
3269   switch (Opcode) {
3270   // 64-bit shift instructions can use only one scalar value input
3271   case AMDGPU::V_LSHLREV_B64_e64:
3272   case AMDGPU::V_LSHLREV_B64_gfx10:
3273   case AMDGPU::V_LSHRREV_B64_e64:
3274   case AMDGPU::V_LSHRREV_B64_gfx10:
3275   case AMDGPU::V_ASHRREV_I64_e64:
3276   case AMDGPU::V_ASHRREV_I64_gfx10:
3277   case AMDGPU::V_LSHL_B64_e64:
3278   case AMDGPU::V_LSHR_B64_e64:
3279   case AMDGPU::V_ASHR_I64_e64:
3280     return 1;
3281   default:
3282     return 2;
3283   }
3284 }
3285 
3286 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3287   const MCOperand &MO = Inst.getOperand(OpIdx);
3288   if (MO.isImm()) {
3289     return !isInlineConstant(Inst, OpIdx);
3290   } else if (MO.isReg()) {
3291     auto Reg = MO.getReg();
3292     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3293     auto PReg = mc2PseudoReg(Reg);
3294     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3295   } else {
3296     return true;
3297   }
3298 }
3299 
3300 bool
3301 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3302                                                 const OperandVector &Operands) {
3303   const unsigned Opcode = Inst.getOpcode();
3304   const MCInstrDesc &Desc = MII.get(Opcode);
3305   unsigned LastSGPR = AMDGPU::NoRegister;
3306   unsigned ConstantBusUseCount = 0;
3307   unsigned NumLiterals = 0;
3308   unsigned LiteralSize;
3309 
3310   if (Desc.TSFlags &
3311       (SIInstrFlags::VOPC |
3312        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3313        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3314        SIInstrFlags::SDWA)) {
3315     // Check special imm operands (used by madmk, etc)
3316     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3317       ++NumLiterals;
3318       LiteralSize = 4;
3319     }
3320 
3321     SmallDenseSet<unsigned> SGPRsUsed;
3322     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3323     if (SGPRUsed != AMDGPU::NoRegister) {
3324       SGPRsUsed.insert(SGPRUsed);
3325       ++ConstantBusUseCount;
3326     }
3327 
3328     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3329     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3330     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3331 
3332     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3333 
3334     for (int OpIdx : OpIndices) {
3335       if (OpIdx == -1) break;
3336 
3337       const MCOperand &MO = Inst.getOperand(OpIdx);
3338       if (usesConstantBus(Inst, OpIdx)) {
3339         if (MO.isReg()) {
3340           LastSGPR = mc2PseudoReg(MO.getReg());
3341           // Pairs of registers with a partial intersections like these
3342           //   s0, s[0:1]
3343           //   flat_scratch_lo, flat_scratch
3344           //   flat_scratch_lo, flat_scratch_hi
3345           // are theoretically valid but they are disabled anyway.
3346           // Note that this code mimics SIInstrInfo::verifyInstruction
3347           if (!SGPRsUsed.count(LastSGPR)) {
3348             SGPRsUsed.insert(LastSGPR);
3349             ++ConstantBusUseCount;
3350           }
3351         } else { // Expression or a literal
3352 
3353           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3354             continue; // special operand like VINTERP attr_chan
3355 
3356           // An instruction may use only one literal.
3357           // This has been validated on the previous step.
3358           // See validateVOPLiteral.
3359           // This literal may be used as more than one operand.
3360           // If all these operands are of the same size,
3361           // this literal counts as one scalar value.
3362           // Otherwise it counts as 2 scalar values.
3363           // See "GFX10 Shader Programming", section 3.6.2.3.
3364 
3365           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3366           if (Size < 4) Size = 4;
3367 
3368           if (NumLiterals == 0) {
3369             NumLiterals = 1;
3370             LiteralSize = Size;
3371           } else if (LiteralSize != Size) {
3372             NumLiterals = 2;
3373           }
3374         }
3375       }
3376     }
3377   }
3378   ConstantBusUseCount += NumLiterals;
3379 
3380   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3381     return true;
3382 
3383   SMLoc LitLoc = getLitLoc(Operands);
3384   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3385   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3386   Error(Loc, "invalid operand (violates constant bus restrictions)");
3387   return false;
3388 }
3389 
3390 bool
3391 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3392                                                  const OperandVector &Operands) {
3393   const unsigned Opcode = Inst.getOpcode();
3394   const MCInstrDesc &Desc = MII.get(Opcode);
3395 
3396   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3397   if (DstIdx == -1 ||
3398       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3399     return true;
3400   }
3401 
3402   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3403 
3404   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3405   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3406   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3407 
3408   assert(DstIdx != -1);
3409   const MCOperand &Dst = Inst.getOperand(DstIdx);
3410   assert(Dst.isReg());
3411 
3412   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3413 
3414   for (int SrcIdx : SrcIndices) {
3415     if (SrcIdx == -1) break;
3416     const MCOperand &Src = Inst.getOperand(SrcIdx);
3417     if (Src.isReg()) {
3418       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3419         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3420         Error(getRegLoc(SrcReg, Operands),
3421           "destination must be different than all sources");
3422         return false;
3423       }
3424     }
3425   }
3426 
3427   return true;
3428 }
3429 
3430 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3431 
3432   const unsigned Opc = Inst.getOpcode();
3433   const MCInstrDesc &Desc = MII.get(Opc);
3434 
3435   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3436     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3437     assert(ClampIdx != -1);
3438     return Inst.getOperand(ClampIdx).getImm() == 0;
3439   }
3440 
3441   return true;
3442 }
3443 
3444 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3445 
3446   const unsigned Opc = Inst.getOpcode();
3447   const MCInstrDesc &Desc = MII.get(Opc);
3448 
3449   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3450     return true;
3451 
3452   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3453   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3454   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3455 
3456   assert(VDataIdx != -1);
3457 
3458   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3459     return true;
3460 
3461   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3462   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3463   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3464   if (DMask == 0)
3465     DMask = 1;
3466 
3467   unsigned DataSize =
3468     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3469   if (hasPackedD16()) {
3470     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3471     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3472       DataSize = (DataSize + 1) / 2;
3473   }
3474 
3475   return (VDataSize / 4) == DataSize + TFESize;
3476 }
3477 
3478 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3479   const unsigned Opc = Inst.getOpcode();
3480   const MCInstrDesc &Desc = MII.get(Opc);
3481 
3482   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3483     return true;
3484 
3485   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3486 
3487   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3488       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3489   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3490   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3491   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3492   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3493 
3494   assert(VAddr0Idx != -1);
3495   assert(SrsrcIdx != -1);
3496   assert(SrsrcIdx > VAddr0Idx);
3497 
3498   if (DimIdx == -1)
3499     return true; // intersect_ray
3500 
3501   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3502   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3503   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3504   unsigned ActualAddrSize =
3505       IsNSA ? SrsrcIdx - VAddr0Idx
3506             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3507   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3508 
3509   unsigned ExpectedAddrSize =
3510       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3511 
3512   if (!IsNSA) {
3513     if (ExpectedAddrSize > 8)
3514       ExpectedAddrSize = 16;
3515 
3516     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3517     // This provides backward compatibility for assembly created
3518     // before 160b/192b/224b types were directly supported.
3519     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3520       return true;
3521   }
3522 
3523   return ActualAddrSize == ExpectedAddrSize;
3524 }
3525 
3526 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3527 
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3532     return true;
3533   if (!Desc.mayLoad() || !Desc.mayStore())
3534     return true; // Not atomic
3535 
3536   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3537   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3538 
3539   // This is an incomplete check because image_atomic_cmpswap
3540   // may only use 0x3 and 0xf while other atomic operations
3541   // may use 0x1 and 0x3. However these limitations are
3542   // verified when we check that dmask matches dst size.
3543   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3544 }
3545 
3546 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3547 
3548   const unsigned Opc = Inst.getOpcode();
3549   const MCInstrDesc &Desc = MII.get(Opc);
3550 
3551   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3552     return true;
3553 
3554   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3555   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3556 
3557   // GATHER4 instructions use dmask in a different fashion compared to
3558   // other MIMG instructions. The only useful DMASK values are
3559   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3560   // (red,red,red,red) etc.) The ISA document doesn't mention
3561   // this.
3562   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3563 }
3564 
3565 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3566   const unsigned Opc = Inst.getOpcode();
3567   const MCInstrDesc &Desc = MII.get(Opc);
3568 
3569   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3570     return true;
3571 
3572   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3573   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3574       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3575 
3576   if (!BaseOpcode->MSAA)
3577     return true;
3578 
3579   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3580   assert(DimIdx != -1);
3581 
3582   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3583   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3584 
3585   return DimInfo->MSAA;
3586 }
3587 
3588 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3589 {
3590   switch (Opcode) {
3591   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3592   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3593   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3594     return true;
3595   default:
3596     return false;
3597   }
3598 }
3599 
3600 // movrels* opcodes should only allow VGPRS as src0.
3601 // This is specified in .td description for vop1/vop3,
3602 // but sdwa is handled differently. See isSDWAOperand.
3603 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3604                                       const OperandVector &Operands) {
3605 
3606   const unsigned Opc = Inst.getOpcode();
3607   const MCInstrDesc &Desc = MII.get(Opc);
3608 
3609   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3610     return true;
3611 
3612   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3613   assert(Src0Idx != -1);
3614 
3615   SMLoc ErrLoc;
3616   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3617   if (Src0.isReg()) {
3618     auto Reg = mc2PseudoReg(Src0.getReg());
3619     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3620     if (!isSGPR(Reg, TRI))
3621       return true;
3622     ErrLoc = getRegLoc(Reg, Operands);
3623   } else {
3624     ErrLoc = getConstLoc(Operands);
3625   }
3626 
3627   Error(ErrLoc, "source operand must be a VGPR");
3628   return false;
3629 }
3630 
3631 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3632                                           const OperandVector &Operands) {
3633 
3634   const unsigned Opc = Inst.getOpcode();
3635 
3636   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3637     return true;
3638 
3639   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3640   assert(Src0Idx != -1);
3641 
3642   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3643   if (!Src0.isReg())
3644     return true;
3645 
3646   auto Reg = mc2PseudoReg(Src0.getReg());
3647   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3648   if (isSGPR(Reg, TRI)) {
3649     Error(getRegLoc(Reg, Operands),
3650           "source operand must be either a VGPR or an inline constant");
3651     return false;
3652   }
3653 
3654   return true;
3655 }
3656 
3657 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3658                                    const OperandVector &Operands) {
3659   const unsigned Opc = Inst.getOpcode();
3660   const MCInstrDesc &Desc = MII.get(Opc);
3661 
3662   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3663     return true;
3664 
3665   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3666   if (Src2Idx == -1)
3667     return true;
3668 
3669   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3670   if (!Src2.isReg())
3671     return true;
3672 
3673   MCRegister Src2Reg = Src2.getReg();
3674   MCRegister DstReg = Inst.getOperand(0).getReg();
3675   if (Src2Reg == DstReg)
3676     return true;
3677 
3678   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3679   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3680     return true;
3681 
3682   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3683     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3684           "source 2 operand must not partially overlap with dst");
3685     return false;
3686   }
3687 
3688   return true;
3689 }
3690 
3691 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3692   switch (Inst.getOpcode()) {
3693   default:
3694     return true;
3695   case V_DIV_SCALE_F32_gfx6_gfx7:
3696   case V_DIV_SCALE_F32_vi:
3697   case V_DIV_SCALE_F32_gfx10:
3698   case V_DIV_SCALE_F64_gfx6_gfx7:
3699   case V_DIV_SCALE_F64_vi:
3700   case V_DIV_SCALE_F64_gfx10:
3701     break;
3702   }
3703 
3704   // TODO: Check that src0 = src1 or src2.
3705 
3706   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3707                     AMDGPU::OpName::src2_modifiers,
3708                     AMDGPU::OpName::src2_modifiers}) {
3709     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3710             .getImm() &
3711         SISrcMods::ABS) {
3712       return false;
3713     }
3714   }
3715 
3716   return true;
3717 }
3718 
3719 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3720 
3721   const unsigned Opc = Inst.getOpcode();
3722   const MCInstrDesc &Desc = MII.get(Opc);
3723 
3724   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3725     return true;
3726 
3727   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3728   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3729     if (isCI() || isSI())
3730       return false;
3731   }
3732 
3733   return true;
3734 }
3735 
3736 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3737   const unsigned Opc = Inst.getOpcode();
3738   const MCInstrDesc &Desc = MII.get(Opc);
3739 
3740   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3741     return true;
3742 
3743   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3744   if (DimIdx < 0)
3745     return true;
3746 
3747   long Imm = Inst.getOperand(DimIdx).getImm();
3748   if (Imm < 0 || Imm >= 8)
3749     return false;
3750 
3751   return true;
3752 }
3753 
3754 static bool IsRevOpcode(const unsigned Opcode)
3755 {
3756   switch (Opcode) {
3757   case AMDGPU::V_SUBREV_F32_e32:
3758   case AMDGPU::V_SUBREV_F32_e64:
3759   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3760   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3761   case AMDGPU::V_SUBREV_F32_e32_vi:
3762   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3763   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3764   case AMDGPU::V_SUBREV_F32_e64_vi:
3765 
3766   case AMDGPU::V_SUBREV_CO_U32_e32:
3767   case AMDGPU::V_SUBREV_CO_U32_e64:
3768   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3769   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3770 
3771   case AMDGPU::V_SUBBREV_U32_e32:
3772   case AMDGPU::V_SUBBREV_U32_e64:
3773   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3774   case AMDGPU::V_SUBBREV_U32_e32_vi:
3775   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3776   case AMDGPU::V_SUBBREV_U32_e64_vi:
3777 
3778   case AMDGPU::V_SUBREV_U32_e32:
3779   case AMDGPU::V_SUBREV_U32_e64:
3780   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3781   case AMDGPU::V_SUBREV_U32_e32_vi:
3782   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3783   case AMDGPU::V_SUBREV_U32_e64_vi:
3784 
3785   case AMDGPU::V_SUBREV_F16_e32:
3786   case AMDGPU::V_SUBREV_F16_e64:
3787   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3788   case AMDGPU::V_SUBREV_F16_e32_vi:
3789   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3790   case AMDGPU::V_SUBREV_F16_e64_vi:
3791 
3792   case AMDGPU::V_SUBREV_U16_e32:
3793   case AMDGPU::V_SUBREV_U16_e64:
3794   case AMDGPU::V_SUBREV_U16_e32_vi:
3795   case AMDGPU::V_SUBREV_U16_e64_vi:
3796 
3797   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3798   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3799   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3800 
3801   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3802   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3803 
3804   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3805   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3806 
3807   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3808   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3809 
3810   case AMDGPU::V_LSHRREV_B32_e32:
3811   case AMDGPU::V_LSHRREV_B32_e64:
3812   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3813   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3814   case AMDGPU::V_LSHRREV_B32_e32_vi:
3815   case AMDGPU::V_LSHRREV_B32_e64_vi:
3816   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3817   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3818 
3819   case AMDGPU::V_ASHRREV_I32_e32:
3820   case AMDGPU::V_ASHRREV_I32_e64:
3821   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3822   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3823   case AMDGPU::V_ASHRREV_I32_e32_vi:
3824   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3825   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3826   case AMDGPU::V_ASHRREV_I32_e64_vi:
3827 
3828   case AMDGPU::V_LSHLREV_B32_e32:
3829   case AMDGPU::V_LSHLREV_B32_e64:
3830   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3831   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3832   case AMDGPU::V_LSHLREV_B32_e32_vi:
3833   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3834   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3835   case AMDGPU::V_LSHLREV_B32_e64_vi:
3836 
3837   case AMDGPU::V_LSHLREV_B16_e32:
3838   case AMDGPU::V_LSHLREV_B16_e64:
3839   case AMDGPU::V_LSHLREV_B16_e32_vi:
3840   case AMDGPU::V_LSHLREV_B16_e64_vi:
3841   case AMDGPU::V_LSHLREV_B16_gfx10:
3842 
3843   case AMDGPU::V_LSHRREV_B16_e32:
3844   case AMDGPU::V_LSHRREV_B16_e64:
3845   case AMDGPU::V_LSHRREV_B16_e32_vi:
3846   case AMDGPU::V_LSHRREV_B16_e64_vi:
3847   case AMDGPU::V_LSHRREV_B16_gfx10:
3848 
3849   case AMDGPU::V_ASHRREV_I16_e32:
3850   case AMDGPU::V_ASHRREV_I16_e64:
3851   case AMDGPU::V_ASHRREV_I16_e32_vi:
3852   case AMDGPU::V_ASHRREV_I16_e64_vi:
3853   case AMDGPU::V_ASHRREV_I16_gfx10:
3854 
3855   case AMDGPU::V_LSHLREV_B64_e64:
3856   case AMDGPU::V_LSHLREV_B64_gfx10:
3857   case AMDGPU::V_LSHLREV_B64_vi:
3858 
3859   case AMDGPU::V_LSHRREV_B64_e64:
3860   case AMDGPU::V_LSHRREV_B64_gfx10:
3861   case AMDGPU::V_LSHRREV_B64_vi:
3862 
3863   case AMDGPU::V_ASHRREV_I64_e64:
3864   case AMDGPU::V_ASHRREV_I64_gfx10:
3865   case AMDGPU::V_ASHRREV_I64_vi:
3866 
3867   case AMDGPU::V_PK_LSHLREV_B16:
3868   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3869   case AMDGPU::V_PK_LSHLREV_B16_vi:
3870 
3871   case AMDGPU::V_PK_LSHRREV_B16:
3872   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3873   case AMDGPU::V_PK_LSHRREV_B16_vi:
3874   case AMDGPU::V_PK_ASHRREV_I16:
3875   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3876   case AMDGPU::V_PK_ASHRREV_I16_vi:
3877     return true;
3878   default:
3879     return false;
3880   }
3881 }
3882 
3883 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3884 
3885   using namespace SIInstrFlags;
3886   const unsigned Opcode = Inst.getOpcode();
3887   const MCInstrDesc &Desc = MII.get(Opcode);
3888 
3889   // lds_direct register is defined so that it can be used
3890   // with 9-bit operands only. Ignore encodings which do not accept these.
3891   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3892   if ((Desc.TSFlags & Enc) == 0)
3893     return None;
3894 
3895   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3896     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3897     if (SrcIdx == -1)
3898       break;
3899     const auto &Src = Inst.getOperand(SrcIdx);
3900     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3901 
3902       if (isGFX90A())
3903         return StringRef("lds_direct is not supported on this GPU");
3904 
3905       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3906         return StringRef("lds_direct cannot be used with this instruction");
3907 
3908       if (SrcName != OpName::src0)
3909         return StringRef("lds_direct may be used as src0 only");
3910     }
3911   }
3912 
3913   return None;
3914 }
3915 
3916 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3917   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3918     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3919     if (Op.isFlatOffset())
3920       return Op.getStartLoc();
3921   }
3922   return getLoc();
3923 }
3924 
3925 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3926                                          const OperandVector &Operands) {
3927   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3928   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3929     return true;
3930 
3931   auto Opcode = Inst.getOpcode();
3932   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3933   assert(OpNum != -1);
3934 
3935   const auto &Op = Inst.getOperand(OpNum);
3936   if (!hasFlatOffsets() && Op.getImm() != 0) {
3937     Error(getFlatOffsetLoc(Operands),
3938           "flat offset modifier is not supported on this GPU");
3939     return false;
3940   }
3941 
3942   // For FLAT segment the offset must be positive;
3943   // MSB is ignored and forced to zero.
3944   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3945     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3946     if (!isIntN(OffsetSize, Op.getImm())) {
3947       Error(getFlatOffsetLoc(Operands),
3948             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3949       return false;
3950     }
3951   } else {
3952     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3953     if (!isUIntN(OffsetSize, Op.getImm())) {
3954       Error(getFlatOffsetLoc(Operands),
3955             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3956       return false;
3957     }
3958   }
3959 
3960   return true;
3961 }
3962 
3963 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3964   // Start with second operand because SMEM Offset cannot be dst or src0.
3965   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3966     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3967     if (Op.isSMEMOffset())
3968       return Op.getStartLoc();
3969   }
3970   return getLoc();
3971 }
3972 
3973 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3974                                          const OperandVector &Operands) {
3975   if (isCI() || isSI())
3976     return true;
3977 
3978   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3979   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3980     return true;
3981 
3982   auto Opcode = Inst.getOpcode();
3983   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3984   if (OpNum == -1)
3985     return true;
3986 
3987   const auto &Op = Inst.getOperand(OpNum);
3988   if (!Op.isImm())
3989     return true;
3990 
3991   uint64_t Offset = Op.getImm();
3992   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3993   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3994       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3995     return true;
3996 
3997   Error(getSMEMOffsetLoc(Operands),
3998         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3999                                "expected a 21-bit signed offset");
4000 
4001   return false;
4002 }
4003 
4004 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4005   unsigned Opcode = Inst.getOpcode();
4006   const MCInstrDesc &Desc = MII.get(Opcode);
4007   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4008     return true;
4009 
4010   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4011   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4012 
4013   const int OpIndices[] = { Src0Idx, Src1Idx };
4014 
4015   unsigned NumExprs = 0;
4016   unsigned NumLiterals = 0;
4017   uint32_t LiteralValue;
4018 
4019   for (int OpIdx : OpIndices) {
4020     if (OpIdx == -1) break;
4021 
4022     const MCOperand &MO = Inst.getOperand(OpIdx);
4023     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4024     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4025       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4026         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4027         if (NumLiterals == 0 || LiteralValue != Value) {
4028           LiteralValue = Value;
4029           ++NumLiterals;
4030         }
4031       } else if (MO.isExpr()) {
4032         ++NumExprs;
4033       }
4034     }
4035   }
4036 
4037   return NumLiterals + NumExprs <= 1;
4038 }
4039 
4040 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4041   const unsigned Opc = Inst.getOpcode();
4042   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4043       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4044     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4045     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4046 
4047     if (OpSel & ~3)
4048       return false;
4049   }
4050 
4051   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4052     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4053     if (OpSelIdx != -1) {
4054       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4055         return false;
4056     }
4057     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4058     if (OpSelHiIdx != -1) {
4059       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4060         return false;
4061     }
4062   }
4063 
4064   return true;
4065 }
4066 
4067 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4068                                   const OperandVector &Operands) {
4069   const unsigned Opc = Inst.getOpcode();
4070   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4071   if (DppCtrlIdx < 0)
4072     return true;
4073   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4074 
4075   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4076     // DPP64 is supported for row_newbcast only.
4077     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4078     if (Src0Idx >= 0 &&
4079         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4080       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4081       Error(S, "64 bit dpp only supports row_newbcast");
4082       return false;
4083     }
4084   }
4085 
4086   return true;
4087 }
4088 
4089 // Check if VCC register matches wavefront size
4090 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4091   auto FB = getFeatureBits();
4092   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4093     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4094 }
4095 
4096 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4097 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4098                                          const OperandVector &Operands) {
4099   unsigned Opcode = Inst.getOpcode();
4100   const MCInstrDesc &Desc = MII.get(Opcode);
4101   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4102   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4103       ImmIdx == -1)
4104     return true;
4105 
4106   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4107   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4108   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4109 
4110   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4111 
4112   unsigned NumExprs = 0;
4113   unsigned NumLiterals = 0;
4114   uint32_t LiteralValue;
4115 
4116   for (int OpIdx : OpIndices) {
4117     if (OpIdx == -1)
4118       continue;
4119 
4120     const MCOperand &MO = Inst.getOperand(OpIdx);
4121     if (!MO.isImm() && !MO.isExpr())
4122       continue;
4123     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4124       continue;
4125 
4126     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4127         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4128       Error(getConstLoc(Operands),
4129             "inline constants are not allowed for this operand");
4130       return false;
4131     }
4132 
4133     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4134       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4135       if (NumLiterals == 0 || LiteralValue != Value) {
4136         LiteralValue = Value;
4137         ++NumLiterals;
4138       }
4139     } else if (MO.isExpr()) {
4140       ++NumExprs;
4141     }
4142   }
4143   NumLiterals += NumExprs;
4144 
4145   if (!NumLiterals)
4146     return true;
4147 
4148   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4149     Error(getLitLoc(Operands), "literal operands are not supported");
4150     return false;
4151   }
4152 
4153   if (NumLiterals > 1) {
4154     Error(getLitLoc(Operands), "only one literal operand is allowed");
4155     return false;
4156   }
4157 
4158   return true;
4159 }
4160 
4161 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4162 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4163                          const MCRegisterInfo *MRI) {
4164   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4165   if (OpIdx < 0)
4166     return -1;
4167 
4168   const MCOperand &Op = Inst.getOperand(OpIdx);
4169   if (!Op.isReg())
4170     return -1;
4171 
4172   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4173   auto Reg = Sub ? Sub : Op.getReg();
4174   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4175   return AGPR32.contains(Reg) ? 1 : 0;
4176 }
4177 
4178 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4179   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4180   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4181                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4182                   SIInstrFlags::DS)) == 0)
4183     return true;
4184 
4185   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4186                                                       : AMDGPU::OpName::vdata;
4187 
4188   const MCRegisterInfo *MRI = getMRI();
4189   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4190   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4191 
4192   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4193     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4194     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4195       return false;
4196   }
4197 
4198   auto FB = getFeatureBits();
4199   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4200     if (DataAreg < 0 || DstAreg < 0)
4201       return true;
4202     return DstAreg == DataAreg;
4203   }
4204 
4205   return DstAreg < 1 && DataAreg < 1;
4206 }
4207 
4208 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4209   auto FB = getFeatureBits();
4210   if (!FB[AMDGPU::FeatureGFX90AInsts])
4211     return true;
4212 
4213   const MCRegisterInfo *MRI = getMRI();
4214   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4215   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4216   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4217     const MCOperand &Op = Inst.getOperand(I);
4218     if (!Op.isReg())
4219       continue;
4220 
4221     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4222     if (!Sub)
4223       continue;
4224 
4225     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4226       return false;
4227     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4228       return false;
4229   }
4230 
4231   return true;
4232 }
4233 
4234 // gfx90a has an undocumented limitation:
4235 // DS_GWS opcodes must use even aligned registers.
4236 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4237                                   const OperandVector &Operands) {
4238   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4239     return true;
4240 
4241   int Opc = Inst.getOpcode();
4242   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4243       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4244     return true;
4245 
4246   const MCRegisterInfo *MRI = getMRI();
4247   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4248   int Data0Pos =
4249       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4250   assert(Data0Pos != -1);
4251   auto Reg = Inst.getOperand(Data0Pos).getReg();
4252   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4253   if (RegIdx & 1) {
4254     SMLoc RegLoc = getRegLoc(Reg, Operands);
4255     Error(RegLoc, "vgpr must be even aligned");
4256     return false;
4257   }
4258 
4259   return true;
4260 }
4261 
4262 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4263                                             const OperandVector &Operands,
4264                                             const SMLoc &IDLoc) {
4265   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4266                                            AMDGPU::OpName::cpol);
4267   if (CPolPos == -1)
4268     return true;
4269 
4270   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4271 
4272   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4273   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4274       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4275     Error(IDLoc, "invalid cache policy for SMRD instruction");
4276     return false;
4277   }
4278 
4279   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4280     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4281     StringRef CStr(S.getPointer());
4282     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4283     Error(S, "scc is not supported on this GPU");
4284     return false;
4285   }
4286 
4287   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4288     return true;
4289 
4290   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4291     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4292       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4293                               : "instruction must use glc");
4294       return false;
4295     }
4296   } else {
4297     if (CPol & CPol::GLC) {
4298       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4299       StringRef CStr(S.getPointer());
4300       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4301       Error(S, isGFX940() ? "instruction must not use sc0"
4302                           : "instruction must not use glc");
4303       return false;
4304     }
4305   }
4306 
4307   return true;
4308 }
4309 
4310 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4311                                           const SMLoc &IDLoc,
4312                                           const OperandVector &Operands) {
4313   if (auto ErrMsg = validateLdsDirect(Inst)) {
4314     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4315     return false;
4316   }
4317   if (!validateSOPLiteral(Inst)) {
4318     Error(getLitLoc(Operands),
4319       "only one literal operand is allowed");
4320     return false;
4321   }
4322   if (!validateVOPLiteral(Inst, Operands)) {
4323     return false;
4324   }
4325   if (!validateConstantBusLimitations(Inst, Operands)) {
4326     return false;
4327   }
4328   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4329     return false;
4330   }
4331   if (!validateIntClampSupported(Inst)) {
4332     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4333       "integer clamping is not supported on this GPU");
4334     return false;
4335   }
4336   if (!validateOpSel(Inst)) {
4337     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4338       "invalid op_sel operand");
4339     return false;
4340   }
4341   if (!validateDPP(Inst, Operands)) {
4342     return false;
4343   }
4344   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4345   if (!validateMIMGD16(Inst)) {
4346     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4347       "d16 modifier is not supported on this GPU");
4348     return false;
4349   }
4350   if (!validateMIMGDim(Inst)) {
4351     Error(IDLoc, "dim modifier is required on this GPU");
4352     return false;
4353   }
4354   if (!validateMIMGMSAA(Inst)) {
4355     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4356           "invalid dim; must be MSAA type");
4357     return false;
4358   }
4359   if (!validateMIMGDataSize(Inst)) {
4360     Error(IDLoc,
4361       "image data size does not match dmask and tfe");
4362     return false;
4363   }
4364   if (!validateMIMGAddrSize(Inst)) {
4365     Error(IDLoc,
4366       "image address size does not match dim and a16");
4367     return false;
4368   }
4369   if (!validateMIMGAtomicDMask(Inst)) {
4370     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4371       "invalid atomic image dmask");
4372     return false;
4373   }
4374   if (!validateMIMGGatherDMask(Inst)) {
4375     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4376       "invalid image_gather dmask: only one bit must be set");
4377     return false;
4378   }
4379   if (!validateMovrels(Inst, Operands)) {
4380     return false;
4381   }
4382   if (!validateFlatOffset(Inst, Operands)) {
4383     return false;
4384   }
4385   if (!validateSMEMOffset(Inst, Operands)) {
4386     return false;
4387   }
4388   if (!validateMAIAccWrite(Inst, Operands)) {
4389     return false;
4390   }
4391   if (!validateMFMA(Inst, Operands)) {
4392     return false;
4393   }
4394   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4395     return false;
4396   }
4397 
4398   if (!validateAGPRLdSt(Inst)) {
4399     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4400     ? "invalid register class: data and dst should be all VGPR or AGPR"
4401     : "invalid register class: agpr loads and stores not supported on this GPU"
4402     );
4403     return false;
4404   }
4405   if (!validateVGPRAlign(Inst)) {
4406     Error(IDLoc,
4407       "invalid register class: vgpr tuples must be 64 bit aligned");
4408     return false;
4409   }
4410   if (!validateGWS(Inst, Operands)) {
4411     return false;
4412   }
4413 
4414   if (!validateDivScale(Inst)) {
4415     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4416     return false;
4417   }
4418   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4419     return false;
4420   }
4421 
4422   return true;
4423 }
4424 
4425 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4426                                             const FeatureBitset &FBS,
4427                                             unsigned VariantID = 0);
4428 
4429 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4430                                 const FeatureBitset &AvailableFeatures,
4431                                 unsigned VariantID);
4432 
4433 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4434                                        const FeatureBitset &FBS) {
4435   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4436 }
4437 
4438 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4439                                        const FeatureBitset &FBS,
4440                                        ArrayRef<unsigned> Variants) {
4441   for (auto Variant : Variants) {
4442     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4443       return true;
4444   }
4445 
4446   return false;
4447 }
4448 
4449 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4450                                                   const SMLoc &IDLoc) {
4451   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4452 
4453   // Check if requested instruction variant is supported.
4454   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4455     return false;
4456 
4457   // This instruction is not supported.
4458   // Clear any other pending errors because they are no longer relevant.
4459   getParser().clearPendingErrors();
4460 
4461   // Requested instruction variant is not supported.
4462   // Check if any other variants are supported.
4463   StringRef VariantName = getMatchedVariantName();
4464   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4465     return Error(IDLoc,
4466                  Twine(VariantName,
4467                        " variant of this instruction is not supported"));
4468   }
4469 
4470   // Finally check if this instruction is supported on any other GPU.
4471   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4472     return Error(IDLoc, "instruction not supported on this GPU");
4473   }
4474 
4475   // Instruction not supported on any GPU. Probably a typo.
4476   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4477   return Error(IDLoc, "invalid instruction" + Suggestion);
4478 }
4479 
4480 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4481                                               OperandVector &Operands,
4482                                               MCStreamer &Out,
4483                                               uint64_t &ErrorInfo,
4484                                               bool MatchingInlineAsm) {
4485   MCInst Inst;
4486   unsigned Result = Match_Success;
4487   for (auto Variant : getMatchedVariants()) {
4488     uint64_t EI;
4489     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4490                                   Variant);
4491     // We order match statuses from least to most specific. We use most specific
4492     // status as resulting
4493     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4494     if ((R == Match_Success) ||
4495         (R == Match_PreferE32) ||
4496         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4497         (R == Match_InvalidOperand && Result != Match_MissingFeature
4498                                    && Result != Match_PreferE32) ||
4499         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4500                                    && Result != Match_MissingFeature
4501                                    && Result != Match_PreferE32)) {
4502       Result = R;
4503       ErrorInfo = EI;
4504     }
4505     if (R == Match_Success)
4506       break;
4507   }
4508 
4509   if (Result == Match_Success) {
4510     if (!validateInstruction(Inst, IDLoc, Operands)) {
4511       return true;
4512     }
4513     Inst.setLoc(IDLoc);
4514     Out.emitInstruction(Inst, getSTI());
4515     return false;
4516   }
4517 
4518   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4519   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4520     return true;
4521   }
4522 
4523   switch (Result) {
4524   default: break;
4525   case Match_MissingFeature:
4526     // It has been verified that the specified instruction
4527     // mnemonic is valid. A match was found but it requires
4528     // features which are not supported on this GPU.
4529     return Error(IDLoc, "operands are not valid for this GPU or mode");
4530 
4531   case Match_InvalidOperand: {
4532     SMLoc ErrorLoc = IDLoc;
4533     if (ErrorInfo != ~0ULL) {
4534       if (ErrorInfo >= Operands.size()) {
4535         return Error(IDLoc, "too few operands for instruction");
4536       }
4537       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4538       if (ErrorLoc == SMLoc())
4539         ErrorLoc = IDLoc;
4540     }
4541     return Error(ErrorLoc, "invalid operand for instruction");
4542   }
4543 
4544   case Match_PreferE32:
4545     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4546                         "should be encoded as e32");
4547   case Match_MnemonicFail:
4548     llvm_unreachable("Invalid instructions should have been handled already");
4549   }
4550   llvm_unreachable("Implement any new match types added!");
4551 }
4552 
4553 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4554   int64_t Tmp = -1;
4555   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4556     return true;
4557   }
4558   if (getParser().parseAbsoluteExpression(Tmp)) {
4559     return true;
4560   }
4561   Ret = static_cast<uint32_t>(Tmp);
4562   return false;
4563 }
4564 
4565 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4566                                                uint32_t &Minor) {
4567   if (ParseAsAbsoluteExpression(Major))
4568     return TokError("invalid major version");
4569 
4570   if (!trySkipToken(AsmToken::Comma))
4571     return TokError("minor version number required, comma expected");
4572 
4573   if (ParseAsAbsoluteExpression(Minor))
4574     return TokError("invalid minor version");
4575 
4576   return false;
4577 }
4578 
4579 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4580   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4581     return TokError("directive only supported for amdgcn architecture");
4582 
4583   std::string TargetIDDirective;
4584   SMLoc TargetStart = getTok().getLoc();
4585   if (getParser().parseEscapedString(TargetIDDirective))
4586     return true;
4587 
4588   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4589   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4590     return getParser().Error(TargetRange.Start,
4591         (Twine(".amdgcn_target directive's target id ") +
4592          Twine(TargetIDDirective) +
4593          Twine(" does not match the specified target id ") +
4594          Twine(getTargetStreamer().getTargetID()->toString())).str());
4595 
4596   return false;
4597 }
4598 
4599 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4600   return Error(Range.Start, "value out of range", Range);
4601 }
4602 
4603 bool AMDGPUAsmParser::calculateGPRBlocks(
4604     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4605     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4606     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4607     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4608   // TODO(scott.linder): These calculations are duplicated from
4609   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4610   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4611 
4612   unsigned NumVGPRs = NextFreeVGPR;
4613   unsigned NumSGPRs = NextFreeSGPR;
4614 
4615   if (Version.Major >= 10)
4616     NumSGPRs = 0;
4617   else {
4618     unsigned MaxAddressableNumSGPRs =
4619         IsaInfo::getAddressableNumSGPRs(&getSTI());
4620 
4621     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4622         NumSGPRs > MaxAddressableNumSGPRs)
4623       return OutOfRangeError(SGPRRange);
4624 
4625     NumSGPRs +=
4626         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4627 
4628     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4629         NumSGPRs > MaxAddressableNumSGPRs)
4630       return OutOfRangeError(SGPRRange);
4631 
4632     if (Features.test(FeatureSGPRInitBug))
4633       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4634   }
4635 
4636   VGPRBlocks =
4637       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4638   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4639 
4640   return false;
4641 }
4642 
4643 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4644   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4645     return TokError("directive only supported for amdgcn architecture");
4646 
4647   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4648     return TokError("directive only supported for amdhsa OS");
4649 
4650   StringRef KernelName;
4651   if (getParser().parseIdentifier(KernelName))
4652     return true;
4653 
4654   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4655 
4656   StringSet<> Seen;
4657 
4658   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4659 
4660   SMRange VGPRRange;
4661   uint64_t NextFreeVGPR = 0;
4662   uint64_t AccumOffset = 0;
4663   uint64_t SharedVGPRCount = 0;
4664   SMRange SGPRRange;
4665   uint64_t NextFreeSGPR = 0;
4666 
4667   // Count the number of user SGPRs implied from the enabled feature bits.
4668   unsigned ImpliedUserSGPRCount = 0;
4669 
4670   // Track if the asm explicitly contains the directive for the user SGPR
4671   // count.
4672   Optional<unsigned> ExplicitUserSGPRCount;
4673   bool ReserveVCC = true;
4674   bool ReserveFlatScr = true;
4675   Optional<bool> EnableWavefrontSize32;
4676 
4677   while (true) {
4678     while (trySkipToken(AsmToken::EndOfStatement));
4679 
4680     StringRef ID;
4681     SMRange IDRange = getTok().getLocRange();
4682     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4683       return true;
4684 
4685     if (ID == ".end_amdhsa_kernel")
4686       break;
4687 
4688     if (Seen.find(ID) != Seen.end())
4689       return TokError(".amdhsa_ directives cannot be repeated");
4690     Seen.insert(ID);
4691 
4692     SMLoc ValStart = getLoc();
4693     int64_t IVal;
4694     if (getParser().parseAbsoluteExpression(IVal))
4695       return true;
4696     SMLoc ValEnd = getLoc();
4697     SMRange ValRange = SMRange(ValStart, ValEnd);
4698 
4699     if (IVal < 0)
4700       return OutOfRangeError(ValRange);
4701 
4702     uint64_t Val = IVal;
4703 
4704 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4705   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4706     return OutOfRangeError(RANGE);                                             \
4707   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4708 
4709     if (ID == ".amdhsa_group_segment_fixed_size") {
4710       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4711         return OutOfRangeError(ValRange);
4712       KD.group_segment_fixed_size = Val;
4713     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4714       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4715         return OutOfRangeError(ValRange);
4716       KD.private_segment_fixed_size = Val;
4717     } else if (ID == ".amdhsa_kernarg_size") {
4718       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4719         return OutOfRangeError(ValRange);
4720       KD.kernarg_size = Val;
4721     } else if (ID == ".amdhsa_user_sgpr_count") {
4722       ExplicitUserSGPRCount = Val;
4723     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4724       if (hasArchitectedFlatScratch())
4725         return Error(IDRange.Start,
4726                      "directive is not supported with architected flat scratch",
4727                      IDRange);
4728       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4729                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4730                        Val, ValRange);
4731       if (Val)
4732         ImpliedUserSGPRCount += 4;
4733     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4734       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4735                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4736                        ValRange);
4737       if (Val)
4738         ImpliedUserSGPRCount += 2;
4739     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4740       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4741                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4742                        ValRange);
4743       if (Val)
4744         ImpliedUserSGPRCount += 2;
4745     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4746       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4747                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4748                        Val, ValRange);
4749       if (Val)
4750         ImpliedUserSGPRCount += 2;
4751     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4752       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4753                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4754                        ValRange);
4755       if (Val)
4756         ImpliedUserSGPRCount += 2;
4757     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4758       if (hasArchitectedFlatScratch())
4759         return Error(IDRange.Start,
4760                      "directive is not supported with architected flat scratch",
4761                      IDRange);
4762       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4763                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4764                        ValRange);
4765       if (Val)
4766         ImpliedUserSGPRCount += 2;
4767     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4768       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4769                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4770                        Val, ValRange);
4771       if (Val)
4772         ImpliedUserSGPRCount += 1;
4773     } else if (ID == ".amdhsa_wavefront_size32") {
4774       if (IVersion.Major < 10)
4775         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4776       EnableWavefrontSize32 = Val;
4777       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4778                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4779                        Val, ValRange);
4780     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4781       if (hasArchitectedFlatScratch())
4782         return Error(IDRange.Start,
4783                      "directive is not supported with architected flat scratch",
4784                      IDRange);
4785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4787     } else if (ID == ".amdhsa_enable_private_segment") {
4788       if (!hasArchitectedFlatScratch())
4789         return Error(
4790             IDRange.Start,
4791             "directive is not supported without architected flat scratch",
4792             IDRange);
4793       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4794                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4795     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4796       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4797                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4798                        ValRange);
4799     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4800       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4801                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4802                        ValRange);
4803     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4804       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4805                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4806                        ValRange);
4807     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4808       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4809                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4810                        ValRange);
4811     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4812       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4813                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4814                        ValRange);
4815     } else if (ID == ".amdhsa_next_free_vgpr") {
4816       VGPRRange = ValRange;
4817       NextFreeVGPR = Val;
4818     } else if (ID == ".amdhsa_next_free_sgpr") {
4819       SGPRRange = ValRange;
4820       NextFreeSGPR = Val;
4821     } else if (ID == ".amdhsa_accum_offset") {
4822       if (!isGFX90A())
4823         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4824       AccumOffset = Val;
4825     } else if (ID == ".amdhsa_reserve_vcc") {
4826       if (!isUInt<1>(Val))
4827         return OutOfRangeError(ValRange);
4828       ReserveVCC = Val;
4829     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4830       if (IVersion.Major < 7)
4831         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4832       if (hasArchitectedFlatScratch())
4833         return Error(IDRange.Start,
4834                      "directive is not supported with architected flat scratch",
4835                      IDRange);
4836       if (!isUInt<1>(Val))
4837         return OutOfRangeError(ValRange);
4838       ReserveFlatScr = Val;
4839     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4840       if (IVersion.Major < 8)
4841         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4842       if (!isUInt<1>(Val))
4843         return OutOfRangeError(ValRange);
4844       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4845         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4846                                  IDRange);
4847     } else if (ID == ".amdhsa_float_round_mode_32") {
4848       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4849                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4850     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4852                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4853     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4854       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4855                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4856     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4857       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4858                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4859                        ValRange);
4860     } else if (ID == ".amdhsa_dx10_clamp") {
4861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4862                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4863     } else if (ID == ".amdhsa_ieee_mode") {
4864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4865                        Val, ValRange);
4866     } else if (ID == ".amdhsa_fp16_overflow") {
4867       if (IVersion.Major < 9)
4868         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4869       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4870                        ValRange);
4871     } else if (ID == ".amdhsa_tg_split") {
4872       if (!isGFX90A())
4873         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4875                        ValRange);
4876     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4877       if (IVersion.Major < 10)
4878         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4880                        ValRange);
4881     } else if (ID == ".amdhsa_memory_ordered") {
4882       if (IVersion.Major < 10)
4883         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4884       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4885                        ValRange);
4886     } else if (ID == ".amdhsa_forward_progress") {
4887       if (IVersion.Major < 10)
4888         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4889       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4890                        ValRange);
4891     } else if (ID == ".amdhsa_shared_vgpr_count") {
4892       if (IVersion.Major < 10)
4893         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4894       SharedVGPRCount = Val;
4895       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4896                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4897                        ValRange);
4898     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4899       PARSE_BITS_ENTRY(
4900           KD.compute_pgm_rsrc2,
4901           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4902           ValRange);
4903     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4904       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4905                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4906                        Val, ValRange);
4907     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4908       PARSE_BITS_ENTRY(
4909           KD.compute_pgm_rsrc2,
4910           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4911           ValRange);
4912     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4913       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4914                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4915                        Val, ValRange);
4916     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4917       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4918                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4919                        Val, ValRange);
4920     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4921       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4922                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4923                        Val, ValRange);
4924     } else if (ID == ".amdhsa_exception_int_div_zero") {
4925       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4926                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4927                        Val, ValRange);
4928     } else {
4929       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4930     }
4931 
4932 #undef PARSE_BITS_ENTRY
4933   }
4934 
4935   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4936     return TokError(".amdhsa_next_free_vgpr directive is required");
4937 
4938   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4939     return TokError(".amdhsa_next_free_sgpr directive is required");
4940 
4941   unsigned VGPRBlocks;
4942   unsigned SGPRBlocks;
4943   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4944                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4945                          EnableWavefrontSize32, NextFreeVGPR,
4946                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4947                          SGPRBlocks))
4948     return true;
4949 
4950   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4951           VGPRBlocks))
4952     return OutOfRangeError(VGPRRange);
4953   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4954                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4955 
4956   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4957           SGPRBlocks))
4958     return OutOfRangeError(SGPRRange);
4959   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4960                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4961                   SGPRBlocks);
4962 
4963   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4964     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4965                     "enabled user SGPRs");
4966 
4967   unsigned UserSGPRCount =
4968       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4969 
4970   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4971     return TokError("too many user SGPRs enabled");
4972   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4973                   UserSGPRCount);
4974 
4975   if (isGFX90A()) {
4976     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4977       return TokError(".amdhsa_accum_offset directive is required");
4978     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4979       return TokError("accum_offset should be in range [4..256] in "
4980                       "increments of 4");
4981     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4982       return TokError("accum_offset exceeds total VGPR allocation");
4983     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4984                     (AccumOffset / 4 - 1));
4985   }
4986 
4987   if (IVersion.Major == 10) {
4988     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
4989     if (SharedVGPRCount && EnableWavefrontSize32) {
4990       return TokError("shared_vgpr_count directive not valid on "
4991                       "wavefront size 32");
4992     }
4993     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
4994       return TokError("shared_vgpr_count*2 + "
4995                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
4996                       "exceed 63\n");
4997     }
4998   }
4999 
5000   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5001       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5002       ReserveFlatScr);
5003   return false;
5004 }
5005 
5006 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5007   uint32_t Major;
5008   uint32_t Minor;
5009 
5010   if (ParseDirectiveMajorMinor(Major, Minor))
5011     return true;
5012 
5013   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5014   return false;
5015 }
5016 
5017 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5018   uint32_t Major;
5019   uint32_t Minor;
5020   uint32_t Stepping;
5021   StringRef VendorName;
5022   StringRef ArchName;
5023 
5024   // If this directive has no arguments, then use the ISA version for the
5025   // targeted GPU.
5026   if (isToken(AsmToken::EndOfStatement)) {
5027     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5028     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5029                                                         ISA.Stepping,
5030                                                         "AMD", "AMDGPU");
5031     return false;
5032   }
5033 
5034   if (ParseDirectiveMajorMinor(Major, Minor))
5035     return true;
5036 
5037   if (!trySkipToken(AsmToken::Comma))
5038     return TokError("stepping version number required, comma expected");
5039 
5040   if (ParseAsAbsoluteExpression(Stepping))
5041     return TokError("invalid stepping version");
5042 
5043   if (!trySkipToken(AsmToken::Comma))
5044     return TokError("vendor name required, comma expected");
5045 
5046   if (!parseString(VendorName, "invalid vendor name"))
5047     return true;
5048 
5049   if (!trySkipToken(AsmToken::Comma))
5050     return TokError("arch name required, comma expected");
5051 
5052   if (!parseString(ArchName, "invalid arch name"))
5053     return true;
5054 
5055   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5056                                                       VendorName, ArchName);
5057   return false;
5058 }
5059 
5060 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5061                                                amd_kernel_code_t &Header) {
5062   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5063   // assembly for backwards compatibility.
5064   if (ID == "max_scratch_backing_memory_byte_size") {
5065     Parser.eatToEndOfStatement();
5066     return false;
5067   }
5068 
5069   SmallString<40> ErrStr;
5070   raw_svector_ostream Err(ErrStr);
5071   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5072     return TokError(Err.str());
5073   }
5074   Lex();
5075 
5076   if (ID == "enable_wavefront_size32") {
5077     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5078       if (!isGFX10Plus())
5079         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5080       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5081         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5082     } else {
5083       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5084         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5085     }
5086   }
5087 
5088   if (ID == "wavefront_size") {
5089     if (Header.wavefront_size == 5) {
5090       if (!isGFX10Plus())
5091         return TokError("wavefront_size=5 is only allowed on GFX10+");
5092       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5093         return TokError("wavefront_size=5 requires +WavefrontSize32");
5094     } else if (Header.wavefront_size == 6) {
5095       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5096         return TokError("wavefront_size=6 requires +WavefrontSize64");
5097     }
5098   }
5099 
5100   if (ID == "enable_wgp_mode") {
5101     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5102         !isGFX10Plus())
5103       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5104   }
5105 
5106   if (ID == "enable_mem_ordered") {
5107     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5108         !isGFX10Plus())
5109       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5110   }
5111 
5112   if (ID == "enable_fwd_progress") {
5113     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5114         !isGFX10Plus())
5115       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5116   }
5117 
5118   return false;
5119 }
5120 
5121 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5122   amd_kernel_code_t Header;
5123   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5124 
5125   while (true) {
5126     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5127     // will set the current token to EndOfStatement.
5128     while(trySkipToken(AsmToken::EndOfStatement));
5129 
5130     StringRef ID;
5131     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5132       return true;
5133 
5134     if (ID == ".end_amd_kernel_code_t")
5135       break;
5136 
5137     if (ParseAMDKernelCodeTValue(ID, Header))
5138       return true;
5139   }
5140 
5141   getTargetStreamer().EmitAMDKernelCodeT(Header);
5142 
5143   return false;
5144 }
5145 
5146 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5147   StringRef KernelName;
5148   if (!parseId(KernelName, "expected symbol name"))
5149     return true;
5150 
5151   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5152                                            ELF::STT_AMDGPU_HSA_KERNEL);
5153 
5154   KernelScope.initialize(getContext());
5155   return false;
5156 }
5157 
5158 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5159   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5160     return Error(getLoc(),
5161                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5162                  "architectures");
5163   }
5164 
5165   auto TargetIDDirective = getLexer().getTok().getStringContents();
5166   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5167     return Error(getParser().getTok().getLoc(), "target id must match options");
5168 
5169   getTargetStreamer().EmitISAVersion();
5170   Lex();
5171 
5172   return false;
5173 }
5174 
5175 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5176   const char *AssemblerDirectiveBegin;
5177   const char *AssemblerDirectiveEnd;
5178   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5179       isHsaAbiVersion3AndAbove(&getSTI())
5180           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5181                             HSAMD::V3::AssemblerDirectiveEnd)
5182           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5183                             HSAMD::AssemblerDirectiveEnd);
5184 
5185   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5186     return Error(getLoc(),
5187                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5188                  "not available on non-amdhsa OSes")).str());
5189   }
5190 
5191   std::string HSAMetadataString;
5192   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5193                           HSAMetadataString))
5194     return true;
5195 
5196   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5197     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5198       return Error(getLoc(), "invalid HSA metadata");
5199   } else {
5200     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5201       return Error(getLoc(), "invalid HSA metadata");
5202   }
5203 
5204   return false;
5205 }
5206 
5207 /// Common code to parse out a block of text (typically YAML) between start and
5208 /// end directives.
5209 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5210                                           const char *AssemblerDirectiveEnd,
5211                                           std::string &CollectString) {
5212 
5213   raw_string_ostream CollectStream(CollectString);
5214 
5215   getLexer().setSkipSpace(false);
5216 
5217   bool FoundEnd = false;
5218   while (!isToken(AsmToken::Eof)) {
5219     while (isToken(AsmToken::Space)) {
5220       CollectStream << getTokenStr();
5221       Lex();
5222     }
5223 
5224     if (trySkipId(AssemblerDirectiveEnd)) {
5225       FoundEnd = true;
5226       break;
5227     }
5228 
5229     CollectStream << Parser.parseStringToEndOfStatement()
5230                   << getContext().getAsmInfo()->getSeparatorString();
5231 
5232     Parser.eatToEndOfStatement();
5233   }
5234 
5235   getLexer().setSkipSpace(true);
5236 
5237   if (isToken(AsmToken::Eof) && !FoundEnd) {
5238     return TokError(Twine("expected directive ") +
5239                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5240   }
5241 
5242   CollectStream.flush();
5243   return false;
5244 }
5245 
5246 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5247 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5248   std::string String;
5249   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5250                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5251     return true;
5252 
5253   auto PALMetadata = getTargetStreamer().getPALMetadata();
5254   if (!PALMetadata->setFromString(String))
5255     return Error(getLoc(), "invalid PAL metadata");
5256   return false;
5257 }
5258 
5259 /// Parse the assembler directive for old linear-format PAL metadata.
5260 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5261   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5262     return Error(getLoc(),
5263                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5264                  "not available on non-amdpal OSes")).str());
5265   }
5266 
5267   auto PALMetadata = getTargetStreamer().getPALMetadata();
5268   PALMetadata->setLegacy();
5269   for (;;) {
5270     uint32_t Key, Value;
5271     if (ParseAsAbsoluteExpression(Key)) {
5272       return TokError(Twine("invalid value in ") +
5273                       Twine(PALMD::AssemblerDirective));
5274     }
5275     if (!trySkipToken(AsmToken::Comma)) {
5276       return TokError(Twine("expected an even number of values in ") +
5277                       Twine(PALMD::AssemblerDirective));
5278     }
5279     if (ParseAsAbsoluteExpression(Value)) {
5280       return TokError(Twine("invalid value in ") +
5281                       Twine(PALMD::AssemblerDirective));
5282     }
5283     PALMetadata->setRegister(Key, Value);
5284     if (!trySkipToken(AsmToken::Comma))
5285       break;
5286   }
5287   return false;
5288 }
5289 
5290 /// ParseDirectiveAMDGPULDS
5291 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5292 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5293   if (getParser().checkForValidSection())
5294     return true;
5295 
5296   StringRef Name;
5297   SMLoc NameLoc = getLoc();
5298   if (getParser().parseIdentifier(Name))
5299     return TokError("expected identifier in directive");
5300 
5301   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5302   if (parseToken(AsmToken::Comma, "expected ','"))
5303     return true;
5304 
5305   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5306 
5307   int64_t Size;
5308   SMLoc SizeLoc = getLoc();
5309   if (getParser().parseAbsoluteExpression(Size))
5310     return true;
5311   if (Size < 0)
5312     return Error(SizeLoc, "size must be non-negative");
5313   if (Size > LocalMemorySize)
5314     return Error(SizeLoc, "size is too large");
5315 
5316   int64_t Alignment = 4;
5317   if (trySkipToken(AsmToken::Comma)) {
5318     SMLoc AlignLoc = getLoc();
5319     if (getParser().parseAbsoluteExpression(Alignment))
5320       return true;
5321     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5322       return Error(AlignLoc, "alignment must be a power of two");
5323 
5324     // Alignment larger than the size of LDS is possible in theory, as long
5325     // as the linker manages to place to symbol at address 0, but we do want
5326     // to make sure the alignment fits nicely into a 32-bit integer.
5327     if (Alignment >= 1u << 31)
5328       return Error(AlignLoc, "alignment is too large");
5329   }
5330 
5331   if (parseToken(AsmToken::EndOfStatement,
5332                  "unexpected token in '.amdgpu_lds' directive"))
5333     return true;
5334 
5335   Symbol->redefineIfPossible();
5336   if (!Symbol->isUndefined())
5337     return Error(NameLoc, "invalid symbol redefinition");
5338 
5339   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5340   return false;
5341 }
5342 
5343 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5344   StringRef IDVal = DirectiveID.getString();
5345 
5346   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5347     if (IDVal == ".amdhsa_kernel")
5348      return ParseDirectiveAMDHSAKernel();
5349 
5350     // TODO: Restructure/combine with PAL metadata directive.
5351     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5352       return ParseDirectiveHSAMetadata();
5353   } else {
5354     if (IDVal == ".hsa_code_object_version")
5355       return ParseDirectiveHSACodeObjectVersion();
5356 
5357     if (IDVal == ".hsa_code_object_isa")
5358       return ParseDirectiveHSACodeObjectISA();
5359 
5360     if (IDVal == ".amd_kernel_code_t")
5361       return ParseDirectiveAMDKernelCodeT();
5362 
5363     if (IDVal == ".amdgpu_hsa_kernel")
5364       return ParseDirectiveAMDGPUHsaKernel();
5365 
5366     if (IDVal == ".amd_amdgpu_isa")
5367       return ParseDirectiveISAVersion();
5368 
5369     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5370       return ParseDirectiveHSAMetadata();
5371   }
5372 
5373   if (IDVal == ".amdgcn_target")
5374     return ParseDirectiveAMDGCNTarget();
5375 
5376   if (IDVal == ".amdgpu_lds")
5377     return ParseDirectiveAMDGPULDS();
5378 
5379   if (IDVal == PALMD::AssemblerDirectiveBegin)
5380     return ParseDirectivePALMetadataBegin();
5381 
5382   if (IDVal == PALMD::AssemblerDirective)
5383     return ParseDirectivePALMetadata();
5384 
5385   return true;
5386 }
5387 
5388 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5389                                            unsigned RegNo) {
5390 
5391   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5392     return isGFX9Plus();
5393 
5394   // GFX10 has 2 more SGPRs 104 and 105.
5395   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5396     return hasSGPR104_SGPR105();
5397 
5398   switch (RegNo) {
5399   case AMDGPU::SRC_SHARED_BASE:
5400   case AMDGPU::SRC_SHARED_LIMIT:
5401   case AMDGPU::SRC_PRIVATE_BASE:
5402   case AMDGPU::SRC_PRIVATE_LIMIT:
5403   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5404     return isGFX9Plus();
5405   case AMDGPU::TBA:
5406   case AMDGPU::TBA_LO:
5407   case AMDGPU::TBA_HI:
5408   case AMDGPU::TMA:
5409   case AMDGPU::TMA_LO:
5410   case AMDGPU::TMA_HI:
5411     return !isGFX9Plus();
5412   case AMDGPU::XNACK_MASK:
5413   case AMDGPU::XNACK_MASK_LO:
5414   case AMDGPU::XNACK_MASK_HI:
5415     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5416   case AMDGPU::SGPR_NULL:
5417     return isGFX10Plus();
5418   default:
5419     break;
5420   }
5421 
5422   if (isCI())
5423     return true;
5424 
5425   if (isSI() || isGFX10Plus()) {
5426     // No flat_scr on SI.
5427     // On GFX10 flat scratch is not a valid register operand and can only be
5428     // accessed with s_setreg/s_getreg.
5429     switch (RegNo) {
5430     case AMDGPU::FLAT_SCR:
5431     case AMDGPU::FLAT_SCR_LO:
5432     case AMDGPU::FLAT_SCR_HI:
5433       return false;
5434     default:
5435       return true;
5436     }
5437   }
5438 
5439   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5440   // SI/CI have.
5441   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5442     return hasSGPR102_SGPR103();
5443 
5444   return true;
5445 }
5446 
5447 OperandMatchResultTy
5448 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5449                               OperandMode Mode) {
5450   // Try to parse with a custom parser
5451   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5452 
5453   // If we successfully parsed the operand or if there as an error parsing,
5454   // we are done.
5455   //
5456   // If we are parsing after we reach EndOfStatement then this means we
5457   // are appending default values to the Operands list.  This is only done
5458   // by custom parser, so we shouldn't continue on to the generic parsing.
5459   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5460       isToken(AsmToken::EndOfStatement))
5461     return ResTy;
5462 
5463   SMLoc RBraceLoc;
5464   SMLoc LBraceLoc = getLoc();
5465   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5466     unsigned Prefix = Operands.size();
5467 
5468     for (;;) {
5469       auto Loc = getLoc();
5470       ResTy = parseReg(Operands);
5471       if (ResTy == MatchOperand_NoMatch)
5472         Error(Loc, "expected a register");
5473       if (ResTy != MatchOperand_Success)
5474         return MatchOperand_ParseFail;
5475 
5476       RBraceLoc = getLoc();
5477       if (trySkipToken(AsmToken::RBrac))
5478         break;
5479 
5480       if (!skipToken(AsmToken::Comma,
5481                      "expected a comma or a closing square bracket")) {
5482         return MatchOperand_ParseFail;
5483       }
5484     }
5485 
5486     if (Operands.size() - Prefix > 1) {
5487       Operands.insert(Operands.begin() + Prefix,
5488                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5489       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5490     }
5491 
5492     return MatchOperand_Success;
5493   }
5494 
5495   return parseRegOrImm(Operands);
5496 }
5497 
5498 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5499   // Clear any forced encodings from the previous instruction.
5500   setForcedEncodingSize(0);
5501   setForcedDPP(false);
5502   setForcedSDWA(false);
5503 
5504   if (Name.endswith("_e64")) {
5505     setForcedEncodingSize(64);
5506     return Name.substr(0, Name.size() - 4);
5507   } else if (Name.endswith("_e32")) {
5508     setForcedEncodingSize(32);
5509     return Name.substr(0, Name.size() - 4);
5510   } else if (Name.endswith("_dpp")) {
5511     setForcedDPP(true);
5512     return Name.substr(0, Name.size() - 4);
5513   } else if (Name.endswith("_sdwa")) {
5514     setForcedSDWA(true);
5515     return Name.substr(0, Name.size() - 5);
5516   }
5517   return Name;
5518 }
5519 
5520 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5521                                        StringRef Name,
5522                                        SMLoc NameLoc, OperandVector &Operands) {
5523   // Add the instruction mnemonic
5524   Name = parseMnemonicSuffix(Name);
5525   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5526 
5527   bool IsMIMG = Name.startswith("image_");
5528 
5529   while (!trySkipToken(AsmToken::EndOfStatement)) {
5530     OperandMode Mode = OperandMode_Default;
5531     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5532       Mode = OperandMode_NSA;
5533     CPolSeen = 0;
5534     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5535 
5536     if (Res != MatchOperand_Success) {
5537       checkUnsupportedInstruction(Name, NameLoc);
5538       if (!Parser.hasPendingError()) {
5539         // FIXME: use real operand location rather than the current location.
5540         StringRef Msg =
5541           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5542                                             "not a valid operand.";
5543         Error(getLoc(), Msg);
5544       }
5545       while (!trySkipToken(AsmToken::EndOfStatement)) {
5546         lex();
5547       }
5548       return true;
5549     }
5550 
5551     // Eat the comma or space if there is one.
5552     trySkipToken(AsmToken::Comma);
5553   }
5554 
5555   return false;
5556 }
5557 
5558 //===----------------------------------------------------------------------===//
5559 // Utility functions
5560 //===----------------------------------------------------------------------===//
5561 
5562 OperandMatchResultTy
5563 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5564 
5565   if (!trySkipId(Prefix, AsmToken::Colon))
5566     return MatchOperand_NoMatch;
5567 
5568   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5569 }
5570 
5571 OperandMatchResultTy
5572 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5573                                     AMDGPUOperand::ImmTy ImmTy,
5574                                     bool (*ConvertResult)(int64_t&)) {
5575   SMLoc S = getLoc();
5576   int64_t Value = 0;
5577 
5578   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5579   if (Res != MatchOperand_Success)
5580     return Res;
5581 
5582   if (ConvertResult && !ConvertResult(Value)) {
5583     Error(S, "invalid " + StringRef(Prefix) + " value.");
5584   }
5585 
5586   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5587   return MatchOperand_Success;
5588 }
5589 
5590 OperandMatchResultTy
5591 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5592                                              OperandVector &Operands,
5593                                              AMDGPUOperand::ImmTy ImmTy,
5594                                              bool (*ConvertResult)(int64_t&)) {
5595   SMLoc S = getLoc();
5596   if (!trySkipId(Prefix, AsmToken::Colon))
5597     return MatchOperand_NoMatch;
5598 
5599   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5600     return MatchOperand_ParseFail;
5601 
5602   unsigned Val = 0;
5603   const unsigned MaxSize = 4;
5604 
5605   // FIXME: How to verify the number of elements matches the number of src
5606   // operands?
5607   for (int I = 0; ; ++I) {
5608     int64_t Op;
5609     SMLoc Loc = getLoc();
5610     if (!parseExpr(Op))
5611       return MatchOperand_ParseFail;
5612 
5613     if (Op != 0 && Op != 1) {
5614       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5615       return MatchOperand_ParseFail;
5616     }
5617 
5618     Val |= (Op << I);
5619 
5620     if (trySkipToken(AsmToken::RBrac))
5621       break;
5622 
5623     if (I + 1 == MaxSize) {
5624       Error(getLoc(), "expected a closing square bracket");
5625       return MatchOperand_ParseFail;
5626     }
5627 
5628     if (!skipToken(AsmToken::Comma, "expected a comma"))
5629       return MatchOperand_ParseFail;
5630   }
5631 
5632   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5633   return MatchOperand_Success;
5634 }
5635 
5636 OperandMatchResultTy
5637 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5638                                AMDGPUOperand::ImmTy ImmTy) {
5639   int64_t Bit;
5640   SMLoc S = getLoc();
5641 
5642   if (trySkipId(Name)) {
5643     Bit = 1;
5644   } else if (trySkipId("no", Name)) {
5645     Bit = 0;
5646   } else {
5647     return MatchOperand_NoMatch;
5648   }
5649 
5650   if (Name == "r128" && !hasMIMG_R128()) {
5651     Error(S, "r128 modifier is not supported on this GPU");
5652     return MatchOperand_ParseFail;
5653   }
5654   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5655     Error(S, "a16 modifier is not supported on this GPU");
5656     return MatchOperand_ParseFail;
5657   }
5658 
5659   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5660     ImmTy = AMDGPUOperand::ImmTyR128A16;
5661 
5662   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5663   return MatchOperand_Success;
5664 }
5665 
5666 OperandMatchResultTy
5667 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5668   unsigned CPolOn = 0;
5669   unsigned CPolOff = 0;
5670   SMLoc S = getLoc();
5671 
5672   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5673   if (isGFX940() && !Mnemo.startswith("s_")) {
5674     if (trySkipId("sc0"))
5675       CPolOn = AMDGPU::CPol::SC0;
5676     else if (trySkipId("nosc0"))
5677       CPolOff = AMDGPU::CPol::SC0;
5678     else if (trySkipId("nt"))
5679       CPolOn = AMDGPU::CPol::NT;
5680     else if (trySkipId("nont"))
5681       CPolOff = AMDGPU::CPol::NT;
5682     else if (trySkipId("sc1"))
5683       CPolOn = AMDGPU::CPol::SC1;
5684     else if (trySkipId("nosc1"))
5685       CPolOff = AMDGPU::CPol::SC1;
5686     else
5687       return MatchOperand_NoMatch;
5688   }
5689   else if (trySkipId("glc"))
5690     CPolOn = AMDGPU::CPol::GLC;
5691   else if (trySkipId("noglc"))
5692     CPolOff = AMDGPU::CPol::GLC;
5693   else if (trySkipId("slc"))
5694     CPolOn = AMDGPU::CPol::SLC;
5695   else if (trySkipId("noslc"))
5696     CPolOff = AMDGPU::CPol::SLC;
5697   else if (trySkipId("dlc"))
5698     CPolOn = AMDGPU::CPol::DLC;
5699   else if (trySkipId("nodlc"))
5700     CPolOff = AMDGPU::CPol::DLC;
5701   else if (trySkipId("scc"))
5702     CPolOn = AMDGPU::CPol::SCC;
5703   else if (trySkipId("noscc"))
5704     CPolOff = AMDGPU::CPol::SCC;
5705   else
5706     return MatchOperand_NoMatch;
5707 
5708   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5709     Error(S, "dlc modifier is not supported on this GPU");
5710     return MatchOperand_ParseFail;
5711   }
5712 
5713   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5714     Error(S, "scc modifier is not supported on this GPU");
5715     return MatchOperand_ParseFail;
5716   }
5717 
5718   if (CPolSeen & (CPolOn | CPolOff)) {
5719     Error(S, "duplicate cache policy modifier");
5720     return MatchOperand_ParseFail;
5721   }
5722 
5723   CPolSeen |= (CPolOn | CPolOff);
5724 
5725   for (unsigned I = 1; I != Operands.size(); ++I) {
5726     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5727     if (Op.isCPol()) {
5728       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5729       return MatchOperand_Success;
5730     }
5731   }
5732 
5733   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5734                                               AMDGPUOperand::ImmTyCPol));
5735 
5736   return MatchOperand_Success;
5737 }
5738 
5739 static void addOptionalImmOperand(
5740   MCInst& Inst, const OperandVector& Operands,
5741   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5742   AMDGPUOperand::ImmTy ImmT,
5743   int64_t Default = 0) {
5744   auto i = OptionalIdx.find(ImmT);
5745   if (i != OptionalIdx.end()) {
5746     unsigned Idx = i->second;
5747     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5748   } else {
5749     Inst.addOperand(MCOperand::createImm(Default));
5750   }
5751 }
5752 
5753 OperandMatchResultTy
5754 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5755                                        StringRef &Value,
5756                                        SMLoc &StringLoc) {
5757   if (!trySkipId(Prefix, AsmToken::Colon))
5758     return MatchOperand_NoMatch;
5759 
5760   StringLoc = getLoc();
5761   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5762                                                   : MatchOperand_ParseFail;
5763 }
5764 
5765 //===----------------------------------------------------------------------===//
5766 // MTBUF format
5767 //===----------------------------------------------------------------------===//
5768 
5769 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5770                                   int64_t MaxVal,
5771                                   int64_t &Fmt) {
5772   int64_t Val;
5773   SMLoc Loc = getLoc();
5774 
5775   auto Res = parseIntWithPrefix(Pref, Val);
5776   if (Res == MatchOperand_ParseFail)
5777     return false;
5778   if (Res == MatchOperand_NoMatch)
5779     return true;
5780 
5781   if (Val < 0 || Val > MaxVal) {
5782     Error(Loc, Twine("out of range ", StringRef(Pref)));
5783     return false;
5784   }
5785 
5786   Fmt = Val;
5787   return true;
5788 }
5789 
5790 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5791 // values to live in a joint format operand in the MCInst encoding.
5792 OperandMatchResultTy
5793 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5794   using namespace llvm::AMDGPU::MTBUFFormat;
5795 
5796   int64_t Dfmt = DFMT_UNDEF;
5797   int64_t Nfmt = NFMT_UNDEF;
5798 
5799   // dfmt and nfmt can appear in either order, and each is optional.
5800   for (int I = 0; I < 2; ++I) {
5801     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5802       return MatchOperand_ParseFail;
5803 
5804     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5805       return MatchOperand_ParseFail;
5806     }
5807     // Skip optional comma between dfmt/nfmt
5808     // but guard against 2 commas following each other.
5809     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5810         !peekToken().is(AsmToken::Comma)) {
5811       trySkipToken(AsmToken::Comma);
5812     }
5813   }
5814 
5815   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5816     return MatchOperand_NoMatch;
5817 
5818   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5819   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5820 
5821   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5822   return MatchOperand_Success;
5823 }
5824 
5825 OperandMatchResultTy
5826 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5827   using namespace llvm::AMDGPU::MTBUFFormat;
5828 
5829   int64_t Fmt = UFMT_UNDEF;
5830 
5831   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5832     return MatchOperand_ParseFail;
5833 
5834   if (Fmt == UFMT_UNDEF)
5835     return MatchOperand_NoMatch;
5836 
5837   Format = Fmt;
5838   return MatchOperand_Success;
5839 }
5840 
5841 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5842                                     int64_t &Nfmt,
5843                                     StringRef FormatStr,
5844                                     SMLoc Loc) {
5845   using namespace llvm::AMDGPU::MTBUFFormat;
5846   int64_t Format;
5847 
5848   Format = getDfmt(FormatStr);
5849   if (Format != DFMT_UNDEF) {
5850     Dfmt = Format;
5851     return true;
5852   }
5853 
5854   Format = getNfmt(FormatStr, getSTI());
5855   if (Format != NFMT_UNDEF) {
5856     Nfmt = Format;
5857     return true;
5858   }
5859 
5860   Error(Loc, "unsupported format");
5861   return false;
5862 }
5863 
5864 OperandMatchResultTy
5865 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5866                                           SMLoc FormatLoc,
5867                                           int64_t &Format) {
5868   using namespace llvm::AMDGPU::MTBUFFormat;
5869 
5870   int64_t Dfmt = DFMT_UNDEF;
5871   int64_t Nfmt = NFMT_UNDEF;
5872   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5873     return MatchOperand_ParseFail;
5874 
5875   if (trySkipToken(AsmToken::Comma)) {
5876     StringRef Str;
5877     SMLoc Loc = getLoc();
5878     if (!parseId(Str, "expected a format string") ||
5879         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5880       return MatchOperand_ParseFail;
5881     }
5882     if (Dfmt == DFMT_UNDEF) {
5883       Error(Loc, "duplicate numeric format");
5884       return MatchOperand_ParseFail;
5885     } else if (Nfmt == NFMT_UNDEF) {
5886       Error(Loc, "duplicate data format");
5887       return MatchOperand_ParseFail;
5888     }
5889   }
5890 
5891   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5892   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5893 
5894   if (isGFX10Plus()) {
5895     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5896     if (Ufmt == UFMT_UNDEF) {
5897       Error(FormatLoc, "unsupported format");
5898       return MatchOperand_ParseFail;
5899     }
5900     Format = Ufmt;
5901   } else {
5902     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5903   }
5904 
5905   return MatchOperand_Success;
5906 }
5907 
5908 OperandMatchResultTy
5909 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5910                                             SMLoc Loc,
5911                                             int64_t &Format) {
5912   using namespace llvm::AMDGPU::MTBUFFormat;
5913 
5914   auto Id = getUnifiedFormat(FormatStr);
5915   if (Id == UFMT_UNDEF)
5916     return MatchOperand_NoMatch;
5917 
5918   if (!isGFX10Plus()) {
5919     Error(Loc, "unified format is not supported on this GPU");
5920     return MatchOperand_ParseFail;
5921   }
5922 
5923   Format = Id;
5924   return MatchOperand_Success;
5925 }
5926 
5927 OperandMatchResultTy
5928 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5929   using namespace llvm::AMDGPU::MTBUFFormat;
5930   SMLoc Loc = getLoc();
5931 
5932   if (!parseExpr(Format))
5933     return MatchOperand_ParseFail;
5934   if (!isValidFormatEncoding(Format, getSTI())) {
5935     Error(Loc, "out of range format");
5936     return MatchOperand_ParseFail;
5937   }
5938 
5939   return MatchOperand_Success;
5940 }
5941 
5942 OperandMatchResultTy
5943 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5944   using namespace llvm::AMDGPU::MTBUFFormat;
5945 
5946   if (!trySkipId("format", AsmToken::Colon))
5947     return MatchOperand_NoMatch;
5948 
5949   if (trySkipToken(AsmToken::LBrac)) {
5950     StringRef FormatStr;
5951     SMLoc Loc = getLoc();
5952     if (!parseId(FormatStr, "expected a format string"))
5953       return MatchOperand_ParseFail;
5954 
5955     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5956     if (Res == MatchOperand_NoMatch)
5957       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5958     if (Res != MatchOperand_Success)
5959       return Res;
5960 
5961     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5962       return MatchOperand_ParseFail;
5963 
5964     return MatchOperand_Success;
5965   }
5966 
5967   return parseNumericFormat(Format);
5968 }
5969 
5970 OperandMatchResultTy
5971 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5972   using namespace llvm::AMDGPU::MTBUFFormat;
5973 
5974   int64_t Format = getDefaultFormatEncoding(getSTI());
5975   OperandMatchResultTy Res;
5976   SMLoc Loc = getLoc();
5977 
5978   // Parse legacy format syntax.
5979   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5980   if (Res == MatchOperand_ParseFail)
5981     return Res;
5982 
5983   bool FormatFound = (Res == MatchOperand_Success);
5984 
5985   Operands.push_back(
5986     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5987 
5988   if (FormatFound)
5989     trySkipToken(AsmToken::Comma);
5990 
5991   if (isToken(AsmToken::EndOfStatement)) {
5992     // We are expecting an soffset operand,
5993     // but let matcher handle the error.
5994     return MatchOperand_Success;
5995   }
5996 
5997   // Parse soffset.
5998   Res = parseRegOrImm(Operands);
5999   if (Res != MatchOperand_Success)
6000     return Res;
6001 
6002   trySkipToken(AsmToken::Comma);
6003 
6004   if (!FormatFound) {
6005     Res = parseSymbolicOrNumericFormat(Format);
6006     if (Res == MatchOperand_ParseFail)
6007       return Res;
6008     if (Res == MatchOperand_Success) {
6009       auto Size = Operands.size();
6010       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6011       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6012       Op.setImm(Format);
6013     }
6014     return MatchOperand_Success;
6015   }
6016 
6017   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6018     Error(getLoc(), "duplicate format");
6019     return MatchOperand_ParseFail;
6020   }
6021   return MatchOperand_Success;
6022 }
6023 
6024 //===----------------------------------------------------------------------===//
6025 // ds
6026 //===----------------------------------------------------------------------===//
6027 
6028 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6029                                     const OperandVector &Operands) {
6030   OptionalImmIndexMap OptionalIdx;
6031 
6032   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6033     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6034 
6035     // Add the register arguments
6036     if (Op.isReg()) {
6037       Op.addRegOperands(Inst, 1);
6038       continue;
6039     }
6040 
6041     // Handle optional arguments
6042     OptionalIdx[Op.getImmTy()] = i;
6043   }
6044 
6045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6048 
6049   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6050 }
6051 
6052 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6053                                 bool IsGdsHardcoded) {
6054   OptionalImmIndexMap OptionalIdx;
6055 
6056   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6057     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6058 
6059     // Add the register arguments
6060     if (Op.isReg()) {
6061       Op.addRegOperands(Inst, 1);
6062       continue;
6063     }
6064 
6065     if (Op.isToken() && Op.getToken() == "gds") {
6066       IsGdsHardcoded = true;
6067       continue;
6068     }
6069 
6070     // Handle optional arguments
6071     OptionalIdx[Op.getImmTy()] = i;
6072   }
6073 
6074   AMDGPUOperand::ImmTy OffsetType =
6075     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6076      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6077      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6078                                                       AMDGPUOperand::ImmTyOffset;
6079 
6080   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6081 
6082   if (!IsGdsHardcoded) {
6083     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6084   }
6085   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6086 }
6087 
6088 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6089   OptionalImmIndexMap OptionalIdx;
6090 
6091   unsigned OperandIdx[4];
6092   unsigned EnMask = 0;
6093   int SrcIdx = 0;
6094 
6095   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6096     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6097 
6098     // Add the register arguments
6099     if (Op.isReg()) {
6100       assert(SrcIdx < 4);
6101       OperandIdx[SrcIdx] = Inst.size();
6102       Op.addRegOperands(Inst, 1);
6103       ++SrcIdx;
6104       continue;
6105     }
6106 
6107     if (Op.isOff()) {
6108       assert(SrcIdx < 4);
6109       OperandIdx[SrcIdx] = Inst.size();
6110       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6111       ++SrcIdx;
6112       continue;
6113     }
6114 
6115     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6116       Op.addImmOperands(Inst, 1);
6117       continue;
6118     }
6119 
6120     if (Op.isToken() && Op.getToken() == "done")
6121       continue;
6122 
6123     // Handle optional arguments
6124     OptionalIdx[Op.getImmTy()] = i;
6125   }
6126 
6127   assert(SrcIdx == 4);
6128 
6129   bool Compr = false;
6130   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6131     Compr = true;
6132     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6133     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6134     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6135   }
6136 
6137   for (auto i = 0; i < SrcIdx; ++i) {
6138     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6139       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6140     }
6141   }
6142 
6143   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6144   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6145 
6146   Inst.addOperand(MCOperand::createImm(EnMask));
6147 }
6148 
6149 //===----------------------------------------------------------------------===//
6150 // s_waitcnt
6151 //===----------------------------------------------------------------------===//
6152 
6153 static bool
6154 encodeCnt(
6155   const AMDGPU::IsaVersion ISA,
6156   int64_t &IntVal,
6157   int64_t CntVal,
6158   bool Saturate,
6159   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6160   unsigned (*decode)(const IsaVersion &Version, unsigned))
6161 {
6162   bool Failed = false;
6163 
6164   IntVal = encode(ISA, IntVal, CntVal);
6165   if (CntVal != decode(ISA, IntVal)) {
6166     if (Saturate) {
6167       IntVal = encode(ISA, IntVal, -1);
6168     } else {
6169       Failed = true;
6170     }
6171   }
6172   return Failed;
6173 }
6174 
6175 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6176 
6177   SMLoc CntLoc = getLoc();
6178   StringRef CntName = getTokenStr();
6179 
6180   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6181       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6182     return false;
6183 
6184   int64_t CntVal;
6185   SMLoc ValLoc = getLoc();
6186   if (!parseExpr(CntVal))
6187     return false;
6188 
6189   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6190 
6191   bool Failed = true;
6192   bool Sat = CntName.endswith("_sat");
6193 
6194   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6195     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6196   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6197     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6198   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6199     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6200   } else {
6201     Error(CntLoc, "invalid counter name " + CntName);
6202     return false;
6203   }
6204 
6205   if (Failed) {
6206     Error(ValLoc, "too large value for " + CntName);
6207     return false;
6208   }
6209 
6210   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6211     return false;
6212 
6213   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6214     if (isToken(AsmToken::EndOfStatement)) {
6215       Error(getLoc(), "expected a counter name");
6216       return false;
6217     }
6218   }
6219 
6220   return true;
6221 }
6222 
6223 OperandMatchResultTy
6224 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6225   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6226   int64_t Waitcnt = getWaitcntBitMask(ISA);
6227   SMLoc S = getLoc();
6228 
6229   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6230     while (!isToken(AsmToken::EndOfStatement)) {
6231       if (!parseCnt(Waitcnt))
6232         return MatchOperand_ParseFail;
6233     }
6234   } else {
6235     if (!parseExpr(Waitcnt))
6236       return MatchOperand_ParseFail;
6237   }
6238 
6239   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6240   return MatchOperand_Success;
6241 }
6242 
6243 bool
6244 AMDGPUOperand::isSWaitCnt() const {
6245   return isImm();
6246 }
6247 
6248 //===----------------------------------------------------------------------===//
6249 // hwreg
6250 //===----------------------------------------------------------------------===//
6251 
6252 bool
6253 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6254                                 OperandInfoTy &Offset,
6255                                 OperandInfoTy &Width) {
6256   using namespace llvm::AMDGPU::Hwreg;
6257 
6258   // The register may be specified by name or using a numeric code
6259   HwReg.Loc = getLoc();
6260   if (isToken(AsmToken::Identifier) &&
6261       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) {
6262     HwReg.IsSymbolic = true;
6263     HwReg.Name = getTokenStr();
6264     lex(); // skip register name
6265   } else if (!parseExpr(HwReg.Id, "a register name")) {
6266     return false;
6267   }
6268 
6269   if (trySkipToken(AsmToken::RParen))
6270     return true;
6271 
6272   // parse optional params
6273   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6274     return false;
6275 
6276   Offset.Loc = getLoc();
6277   if (!parseExpr(Offset.Id))
6278     return false;
6279 
6280   if (!skipToken(AsmToken::Comma, "expected a comma"))
6281     return false;
6282 
6283   Width.Loc = getLoc();
6284   return parseExpr(Width.Id) &&
6285          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6286 }
6287 
6288 bool
6289 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6290                                const OperandInfoTy &Offset,
6291                                const OperandInfoTy &Width) {
6292 
6293   using namespace llvm::AMDGPU::Hwreg;
6294 
6295   if (HwReg.IsSymbolic &&
6296       !isValidHwreg(HwReg.Id, getSTI(), HwReg.Name)) {
6297     Error(HwReg.Loc,
6298           "specified hardware register is not supported on this GPU");
6299     return false;
6300   }
6301   if (!isValidHwreg(HwReg.Id)) {
6302     Error(HwReg.Loc,
6303           "invalid code of hardware register: only 6-bit values are legal");
6304     return false;
6305   }
6306   if (!isValidHwregOffset(Offset.Id)) {
6307     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6308     return false;
6309   }
6310   if (!isValidHwregWidth(Width.Id)) {
6311     Error(Width.Loc,
6312           "invalid bitfield width: only values from 1 to 32 are legal");
6313     return false;
6314   }
6315   return true;
6316 }
6317 
6318 OperandMatchResultTy
6319 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6320   using namespace llvm::AMDGPU::Hwreg;
6321 
6322   int64_t ImmVal = 0;
6323   SMLoc Loc = getLoc();
6324 
6325   if (trySkipId("hwreg", AsmToken::LParen)) {
6326     OperandInfoTy HwReg(ID_UNKNOWN_);
6327     OperandInfoTy Offset(OFFSET_DEFAULT_);
6328     OperandInfoTy Width(WIDTH_DEFAULT_);
6329     if (parseHwregBody(HwReg, Offset, Width) &&
6330         validateHwreg(HwReg, Offset, Width)) {
6331       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6332     } else {
6333       return MatchOperand_ParseFail;
6334     }
6335   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6336     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6337       Error(Loc, "invalid immediate: only 16-bit values are legal");
6338       return MatchOperand_ParseFail;
6339     }
6340   } else {
6341     return MatchOperand_ParseFail;
6342   }
6343 
6344   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6345   return MatchOperand_Success;
6346 }
6347 
6348 bool AMDGPUOperand::isHwreg() const {
6349   return isImmTy(ImmTyHwreg);
6350 }
6351 
6352 //===----------------------------------------------------------------------===//
6353 // sendmsg
6354 //===----------------------------------------------------------------------===//
6355 
6356 bool
6357 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6358                                   OperandInfoTy &Op,
6359                                   OperandInfoTy &Stream) {
6360   using namespace llvm::AMDGPU::SendMsg;
6361 
6362   Msg.Loc = getLoc();
6363   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6364     Msg.IsSymbolic = true;
6365     lex(); // skip message name
6366   } else if (!parseExpr(Msg.Id, "a message name")) {
6367     return false;
6368   }
6369 
6370   if (trySkipToken(AsmToken::Comma)) {
6371     Op.IsDefined = true;
6372     Op.Loc = getLoc();
6373     if (isToken(AsmToken::Identifier) &&
6374         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6375       lex(); // skip operation name
6376     } else if (!parseExpr(Op.Id, "an operation name")) {
6377       return false;
6378     }
6379 
6380     if (trySkipToken(AsmToken::Comma)) {
6381       Stream.IsDefined = true;
6382       Stream.Loc = getLoc();
6383       if (!parseExpr(Stream.Id))
6384         return false;
6385     }
6386   }
6387 
6388   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6389 }
6390 
6391 bool
6392 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6393                                  const OperandInfoTy &Op,
6394                                  const OperandInfoTy &Stream) {
6395   using namespace llvm::AMDGPU::SendMsg;
6396 
6397   // Validation strictness depends on whether message is specified
6398   // in a symbolic or in a numeric form. In the latter case
6399   // only encoding possibility is checked.
6400   bool Strict = Msg.IsSymbolic;
6401 
6402   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6403     Error(Msg.Loc, "invalid message id");
6404     return false;
6405   }
6406   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6407     if (Op.IsDefined) {
6408       Error(Op.Loc, "message does not support operations");
6409     } else {
6410       Error(Msg.Loc, "missing message operation");
6411     }
6412     return false;
6413   }
6414   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6415     Error(Op.Loc, "invalid operation id");
6416     return false;
6417   }
6418   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6419     Error(Stream.Loc, "message operation does not support streams");
6420     return false;
6421   }
6422   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6423     Error(Stream.Loc, "invalid message stream id");
6424     return false;
6425   }
6426   return true;
6427 }
6428 
6429 OperandMatchResultTy
6430 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6431   using namespace llvm::AMDGPU::SendMsg;
6432 
6433   int64_t ImmVal = 0;
6434   SMLoc Loc = getLoc();
6435 
6436   if (trySkipId("sendmsg", AsmToken::LParen)) {
6437     OperandInfoTy Msg(ID_UNKNOWN_);
6438     OperandInfoTy Op(OP_NONE_);
6439     OperandInfoTy Stream(STREAM_ID_NONE_);
6440     if (parseSendMsgBody(Msg, Op, Stream) &&
6441         validateSendMsg(Msg, Op, Stream)) {
6442       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6443     } else {
6444       return MatchOperand_ParseFail;
6445     }
6446   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6447     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6448       Error(Loc, "invalid immediate: only 16-bit values are legal");
6449       return MatchOperand_ParseFail;
6450     }
6451   } else {
6452     return MatchOperand_ParseFail;
6453   }
6454 
6455   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6456   return MatchOperand_Success;
6457 }
6458 
6459 bool AMDGPUOperand::isSendMsg() const {
6460   return isImmTy(ImmTySendMsg);
6461 }
6462 
6463 //===----------------------------------------------------------------------===//
6464 // v_interp
6465 //===----------------------------------------------------------------------===//
6466 
6467 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6468   StringRef Str;
6469   SMLoc S = getLoc();
6470 
6471   if (!parseId(Str))
6472     return MatchOperand_NoMatch;
6473 
6474   int Slot = StringSwitch<int>(Str)
6475     .Case("p10", 0)
6476     .Case("p20", 1)
6477     .Case("p0", 2)
6478     .Default(-1);
6479 
6480   if (Slot == -1) {
6481     Error(S, "invalid interpolation slot");
6482     return MatchOperand_ParseFail;
6483   }
6484 
6485   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6486                                               AMDGPUOperand::ImmTyInterpSlot));
6487   return MatchOperand_Success;
6488 }
6489 
6490 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6491   StringRef Str;
6492   SMLoc S = getLoc();
6493 
6494   if (!parseId(Str))
6495     return MatchOperand_NoMatch;
6496 
6497   if (!Str.startswith("attr")) {
6498     Error(S, "invalid interpolation attribute");
6499     return MatchOperand_ParseFail;
6500   }
6501 
6502   StringRef Chan = Str.take_back(2);
6503   int AttrChan = StringSwitch<int>(Chan)
6504     .Case(".x", 0)
6505     .Case(".y", 1)
6506     .Case(".z", 2)
6507     .Case(".w", 3)
6508     .Default(-1);
6509   if (AttrChan == -1) {
6510     Error(S, "invalid or missing interpolation attribute channel");
6511     return MatchOperand_ParseFail;
6512   }
6513 
6514   Str = Str.drop_back(2).drop_front(4);
6515 
6516   uint8_t Attr;
6517   if (Str.getAsInteger(10, Attr)) {
6518     Error(S, "invalid or missing interpolation attribute number");
6519     return MatchOperand_ParseFail;
6520   }
6521 
6522   if (Attr > 63) {
6523     Error(S, "out of bounds interpolation attribute number");
6524     return MatchOperand_ParseFail;
6525   }
6526 
6527   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6528 
6529   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6530                                               AMDGPUOperand::ImmTyInterpAttr));
6531   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6532                                               AMDGPUOperand::ImmTyAttrChan));
6533   return MatchOperand_Success;
6534 }
6535 
6536 //===----------------------------------------------------------------------===//
6537 // exp
6538 //===----------------------------------------------------------------------===//
6539 
6540 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6541   using namespace llvm::AMDGPU::Exp;
6542 
6543   StringRef Str;
6544   SMLoc S = getLoc();
6545 
6546   if (!parseId(Str))
6547     return MatchOperand_NoMatch;
6548 
6549   unsigned Id = getTgtId(Str);
6550   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6551     Error(S, (Id == ET_INVALID) ?
6552                 "invalid exp target" :
6553                 "exp target is not supported on this GPU");
6554     return MatchOperand_ParseFail;
6555   }
6556 
6557   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6558                                               AMDGPUOperand::ImmTyExpTgt));
6559   return MatchOperand_Success;
6560 }
6561 
6562 //===----------------------------------------------------------------------===//
6563 // parser helpers
6564 //===----------------------------------------------------------------------===//
6565 
6566 bool
6567 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6568   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6569 }
6570 
6571 bool
6572 AMDGPUAsmParser::isId(const StringRef Id) const {
6573   return isId(getToken(), Id);
6574 }
6575 
6576 bool
6577 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6578   return getTokenKind() == Kind;
6579 }
6580 
6581 bool
6582 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6583   if (isId(Id)) {
6584     lex();
6585     return true;
6586   }
6587   return false;
6588 }
6589 
6590 bool
6591 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6592   if (isToken(AsmToken::Identifier)) {
6593     StringRef Tok = getTokenStr();
6594     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6595       lex();
6596       return true;
6597     }
6598   }
6599   return false;
6600 }
6601 
6602 bool
6603 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6604   if (isId(Id) && peekToken().is(Kind)) {
6605     lex();
6606     lex();
6607     return true;
6608   }
6609   return false;
6610 }
6611 
6612 bool
6613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6614   if (isToken(Kind)) {
6615     lex();
6616     return true;
6617   }
6618   return false;
6619 }
6620 
6621 bool
6622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6623                            const StringRef ErrMsg) {
6624   if (!trySkipToken(Kind)) {
6625     Error(getLoc(), ErrMsg);
6626     return false;
6627   }
6628   return true;
6629 }
6630 
6631 bool
6632 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6633   SMLoc S = getLoc();
6634 
6635   const MCExpr *Expr;
6636   if (Parser.parseExpression(Expr))
6637     return false;
6638 
6639   if (Expr->evaluateAsAbsolute(Imm))
6640     return true;
6641 
6642   if (Expected.empty()) {
6643     Error(S, "expected absolute expression");
6644   } else {
6645     Error(S, Twine("expected ", Expected) +
6646              Twine(" or an absolute expression"));
6647   }
6648   return false;
6649 }
6650 
6651 bool
6652 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6653   SMLoc S = getLoc();
6654 
6655   const MCExpr *Expr;
6656   if (Parser.parseExpression(Expr))
6657     return false;
6658 
6659   int64_t IntVal;
6660   if (Expr->evaluateAsAbsolute(IntVal)) {
6661     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6662   } else {
6663     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6664   }
6665   return true;
6666 }
6667 
6668 bool
6669 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6670   if (isToken(AsmToken::String)) {
6671     Val = getToken().getStringContents();
6672     lex();
6673     return true;
6674   } else {
6675     Error(getLoc(), ErrMsg);
6676     return false;
6677   }
6678 }
6679 
6680 bool
6681 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6682   if (isToken(AsmToken::Identifier)) {
6683     Val = getTokenStr();
6684     lex();
6685     return true;
6686   } else {
6687     if (!ErrMsg.empty())
6688       Error(getLoc(), ErrMsg);
6689     return false;
6690   }
6691 }
6692 
6693 AsmToken
6694 AMDGPUAsmParser::getToken() const {
6695   return Parser.getTok();
6696 }
6697 
6698 AsmToken
6699 AMDGPUAsmParser::peekToken() {
6700   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6701 }
6702 
6703 void
6704 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6705   auto TokCount = getLexer().peekTokens(Tokens);
6706 
6707   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6708     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6709 }
6710 
6711 AsmToken::TokenKind
6712 AMDGPUAsmParser::getTokenKind() const {
6713   return getLexer().getKind();
6714 }
6715 
6716 SMLoc
6717 AMDGPUAsmParser::getLoc() const {
6718   return getToken().getLoc();
6719 }
6720 
6721 StringRef
6722 AMDGPUAsmParser::getTokenStr() const {
6723   return getToken().getString();
6724 }
6725 
6726 void
6727 AMDGPUAsmParser::lex() {
6728   Parser.Lex();
6729 }
6730 
6731 SMLoc
6732 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6733                                const OperandVector &Operands) const {
6734   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6735     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6736     if (Test(Op))
6737       return Op.getStartLoc();
6738   }
6739   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6740 }
6741 
6742 SMLoc
6743 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6744                            const OperandVector &Operands) const {
6745   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6746   return getOperandLoc(Test, Operands);
6747 }
6748 
6749 SMLoc
6750 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6751                            const OperandVector &Operands) const {
6752   auto Test = [=](const AMDGPUOperand& Op) {
6753     return Op.isRegKind() && Op.getReg() == Reg;
6754   };
6755   return getOperandLoc(Test, Operands);
6756 }
6757 
6758 SMLoc
6759 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6760   auto Test = [](const AMDGPUOperand& Op) {
6761     return Op.IsImmKindLiteral() || Op.isExpr();
6762   };
6763   return getOperandLoc(Test, Operands);
6764 }
6765 
6766 SMLoc
6767 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6768   auto Test = [](const AMDGPUOperand& Op) {
6769     return Op.isImmKindConst();
6770   };
6771   return getOperandLoc(Test, Operands);
6772 }
6773 
6774 //===----------------------------------------------------------------------===//
6775 // swizzle
6776 //===----------------------------------------------------------------------===//
6777 
6778 LLVM_READNONE
6779 static unsigned
6780 encodeBitmaskPerm(const unsigned AndMask,
6781                   const unsigned OrMask,
6782                   const unsigned XorMask) {
6783   using namespace llvm::AMDGPU::Swizzle;
6784 
6785   return BITMASK_PERM_ENC |
6786          (AndMask << BITMASK_AND_SHIFT) |
6787          (OrMask  << BITMASK_OR_SHIFT)  |
6788          (XorMask << BITMASK_XOR_SHIFT);
6789 }
6790 
6791 bool
6792 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6793                                      const unsigned MinVal,
6794                                      const unsigned MaxVal,
6795                                      const StringRef ErrMsg,
6796                                      SMLoc &Loc) {
6797   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6798     return false;
6799   }
6800   Loc = getLoc();
6801   if (!parseExpr(Op)) {
6802     return false;
6803   }
6804   if (Op < MinVal || Op > MaxVal) {
6805     Error(Loc, ErrMsg);
6806     return false;
6807   }
6808 
6809   return true;
6810 }
6811 
6812 bool
6813 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6814                                       const unsigned MinVal,
6815                                       const unsigned MaxVal,
6816                                       const StringRef ErrMsg) {
6817   SMLoc Loc;
6818   for (unsigned i = 0; i < OpNum; ++i) {
6819     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6820       return false;
6821   }
6822 
6823   return true;
6824 }
6825 
6826 bool
6827 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6828   using namespace llvm::AMDGPU::Swizzle;
6829 
6830   int64_t Lane[LANE_NUM];
6831   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6832                            "expected a 2-bit lane id")) {
6833     Imm = QUAD_PERM_ENC;
6834     for (unsigned I = 0; I < LANE_NUM; ++I) {
6835       Imm |= Lane[I] << (LANE_SHIFT * I);
6836     }
6837     return true;
6838   }
6839   return false;
6840 }
6841 
6842 bool
6843 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6844   using namespace llvm::AMDGPU::Swizzle;
6845 
6846   SMLoc Loc;
6847   int64_t GroupSize;
6848   int64_t LaneIdx;
6849 
6850   if (!parseSwizzleOperand(GroupSize,
6851                            2, 32,
6852                            "group size must be in the interval [2,32]",
6853                            Loc)) {
6854     return false;
6855   }
6856   if (!isPowerOf2_64(GroupSize)) {
6857     Error(Loc, "group size must be a power of two");
6858     return false;
6859   }
6860   if (parseSwizzleOperand(LaneIdx,
6861                           0, GroupSize - 1,
6862                           "lane id must be in the interval [0,group size - 1]",
6863                           Loc)) {
6864     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6865     return true;
6866   }
6867   return false;
6868 }
6869 
6870 bool
6871 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6872   using namespace llvm::AMDGPU::Swizzle;
6873 
6874   SMLoc Loc;
6875   int64_t GroupSize;
6876 
6877   if (!parseSwizzleOperand(GroupSize,
6878                            2, 32,
6879                            "group size must be in the interval [2,32]",
6880                            Loc)) {
6881     return false;
6882   }
6883   if (!isPowerOf2_64(GroupSize)) {
6884     Error(Loc, "group size must be a power of two");
6885     return false;
6886   }
6887 
6888   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6889   return true;
6890 }
6891 
6892 bool
6893 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6894   using namespace llvm::AMDGPU::Swizzle;
6895 
6896   SMLoc Loc;
6897   int64_t GroupSize;
6898 
6899   if (!parseSwizzleOperand(GroupSize,
6900                            1, 16,
6901                            "group size must be in the interval [1,16]",
6902                            Loc)) {
6903     return false;
6904   }
6905   if (!isPowerOf2_64(GroupSize)) {
6906     Error(Loc, "group size must be a power of two");
6907     return false;
6908   }
6909 
6910   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6911   return true;
6912 }
6913 
6914 bool
6915 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6916   using namespace llvm::AMDGPU::Swizzle;
6917 
6918   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6919     return false;
6920   }
6921 
6922   StringRef Ctl;
6923   SMLoc StrLoc = getLoc();
6924   if (!parseString(Ctl)) {
6925     return false;
6926   }
6927   if (Ctl.size() != BITMASK_WIDTH) {
6928     Error(StrLoc, "expected a 5-character mask");
6929     return false;
6930   }
6931 
6932   unsigned AndMask = 0;
6933   unsigned OrMask = 0;
6934   unsigned XorMask = 0;
6935 
6936   for (size_t i = 0; i < Ctl.size(); ++i) {
6937     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6938     switch(Ctl[i]) {
6939     default:
6940       Error(StrLoc, "invalid mask");
6941       return false;
6942     case '0':
6943       break;
6944     case '1':
6945       OrMask |= Mask;
6946       break;
6947     case 'p':
6948       AndMask |= Mask;
6949       break;
6950     case 'i':
6951       AndMask |= Mask;
6952       XorMask |= Mask;
6953       break;
6954     }
6955   }
6956 
6957   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6958   return true;
6959 }
6960 
6961 bool
6962 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6963 
6964   SMLoc OffsetLoc = getLoc();
6965 
6966   if (!parseExpr(Imm, "a swizzle macro")) {
6967     return false;
6968   }
6969   if (!isUInt<16>(Imm)) {
6970     Error(OffsetLoc, "expected a 16-bit offset");
6971     return false;
6972   }
6973   return true;
6974 }
6975 
6976 bool
6977 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6978   using namespace llvm::AMDGPU::Swizzle;
6979 
6980   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6981 
6982     SMLoc ModeLoc = getLoc();
6983     bool Ok = false;
6984 
6985     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6986       Ok = parseSwizzleQuadPerm(Imm);
6987     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6988       Ok = parseSwizzleBitmaskPerm(Imm);
6989     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6990       Ok = parseSwizzleBroadcast(Imm);
6991     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6992       Ok = parseSwizzleSwap(Imm);
6993     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6994       Ok = parseSwizzleReverse(Imm);
6995     } else {
6996       Error(ModeLoc, "expected a swizzle mode");
6997     }
6998 
6999     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7000   }
7001 
7002   return false;
7003 }
7004 
7005 OperandMatchResultTy
7006 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7007   SMLoc S = getLoc();
7008   int64_t Imm = 0;
7009 
7010   if (trySkipId("offset")) {
7011 
7012     bool Ok = false;
7013     if (skipToken(AsmToken::Colon, "expected a colon")) {
7014       if (trySkipId("swizzle")) {
7015         Ok = parseSwizzleMacro(Imm);
7016       } else {
7017         Ok = parseSwizzleOffset(Imm);
7018       }
7019     }
7020 
7021     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7022 
7023     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7024   } else {
7025     // Swizzle "offset" operand is optional.
7026     // If it is omitted, try parsing other optional operands.
7027     return parseOptionalOpr(Operands);
7028   }
7029 }
7030 
7031 bool
7032 AMDGPUOperand::isSwizzle() const {
7033   return isImmTy(ImmTySwizzle);
7034 }
7035 
7036 //===----------------------------------------------------------------------===//
7037 // VGPR Index Mode
7038 //===----------------------------------------------------------------------===//
7039 
7040 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7041 
7042   using namespace llvm::AMDGPU::VGPRIndexMode;
7043 
7044   if (trySkipToken(AsmToken::RParen)) {
7045     return OFF;
7046   }
7047 
7048   int64_t Imm = 0;
7049 
7050   while (true) {
7051     unsigned Mode = 0;
7052     SMLoc S = getLoc();
7053 
7054     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7055       if (trySkipId(IdSymbolic[ModeId])) {
7056         Mode = 1 << ModeId;
7057         break;
7058       }
7059     }
7060 
7061     if (Mode == 0) {
7062       Error(S, (Imm == 0)?
7063                "expected a VGPR index mode or a closing parenthesis" :
7064                "expected a VGPR index mode");
7065       return UNDEF;
7066     }
7067 
7068     if (Imm & Mode) {
7069       Error(S, "duplicate VGPR index mode");
7070       return UNDEF;
7071     }
7072     Imm |= Mode;
7073 
7074     if (trySkipToken(AsmToken::RParen))
7075       break;
7076     if (!skipToken(AsmToken::Comma,
7077                    "expected a comma or a closing parenthesis"))
7078       return UNDEF;
7079   }
7080 
7081   return Imm;
7082 }
7083 
7084 OperandMatchResultTy
7085 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7086 
7087   using namespace llvm::AMDGPU::VGPRIndexMode;
7088 
7089   int64_t Imm = 0;
7090   SMLoc S = getLoc();
7091 
7092   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7093     Imm = parseGPRIdxMacro();
7094     if (Imm == UNDEF)
7095       return MatchOperand_ParseFail;
7096   } else {
7097     if (getParser().parseAbsoluteExpression(Imm))
7098       return MatchOperand_ParseFail;
7099     if (Imm < 0 || !isUInt<4>(Imm)) {
7100       Error(S, "invalid immediate: only 4-bit values are legal");
7101       return MatchOperand_ParseFail;
7102     }
7103   }
7104 
7105   Operands.push_back(
7106       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7107   return MatchOperand_Success;
7108 }
7109 
7110 bool AMDGPUOperand::isGPRIdxMode() const {
7111   return isImmTy(ImmTyGprIdxMode);
7112 }
7113 
7114 //===----------------------------------------------------------------------===//
7115 // sopp branch targets
7116 //===----------------------------------------------------------------------===//
7117 
7118 OperandMatchResultTy
7119 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7120 
7121   // Make sure we are not parsing something
7122   // that looks like a label or an expression but is not.
7123   // This will improve error messages.
7124   if (isRegister() || isModifier())
7125     return MatchOperand_NoMatch;
7126 
7127   if (!parseExpr(Operands))
7128     return MatchOperand_ParseFail;
7129 
7130   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7131   assert(Opr.isImm() || Opr.isExpr());
7132   SMLoc Loc = Opr.getStartLoc();
7133 
7134   // Currently we do not support arbitrary expressions as branch targets.
7135   // Only labels and absolute expressions are accepted.
7136   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7137     Error(Loc, "expected an absolute expression or a label");
7138   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7139     Error(Loc, "expected a 16-bit signed jump offset");
7140   }
7141 
7142   return MatchOperand_Success;
7143 }
7144 
7145 //===----------------------------------------------------------------------===//
7146 // Boolean holding registers
7147 //===----------------------------------------------------------------------===//
7148 
7149 OperandMatchResultTy
7150 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7151   return parseReg(Operands);
7152 }
7153 
7154 //===----------------------------------------------------------------------===//
7155 // mubuf
7156 //===----------------------------------------------------------------------===//
7157 
7158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7159   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7160 }
7161 
7162 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7163                                    const OperandVector &Operands,
7164                                    bool IsAtomic,
7165                                    bool IsLds) {
7166   bool IsLdsOpcode = IsLds;
7167   bool HasLdsModifier = false;
7168   OptionalImmIndexMap OptionalIdx;
7169   unsigned FirstOperandIdx = 1;
7170   bool IsAtomicReturn = false;
7171 
7172   if (IsAtomic) {
7173     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7174       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7175       if (!Op.isCPol())
7176         continue;
7177       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7178       break;
7179     }
7180 
7181     if (!IsAtomicReturn) {
7182       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7183       if (NewOpc != -1)
7184         Inst.setOpcode(NewOpc);
7185     }
7186 
7187     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7188                       SIInstrFlags::IsAtomicRet;
7189   }
7190 
7191   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7192     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7193 
7194     // Add the register arguments
7195     if (Op.isReg()) {
7196       Op.addRegOperands(Inst, 1);
7197       // Insert a tied src for atomic return dst.
7198       // This cannot be postponed as subsequent calls to
7199       // addImmOperands rely on correct number of MC operands.
7200       if (IsAtomicReturn && i == FirstOperandIdx)
7201         Op.addRegOperands(Inst, 1);
7202       continue;
7203     }
7204 
7205     // Handle the case where soffset is an immediate
7206     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7207       Op.addImmOperands(Inst, 1);
7208       continue;
7209     }
7210 
7211     HasLdsModifier |= Op.isLDS();
7212 
7213     // Handle tokens like 'offen' which are sometimes hard-coded into the
7214     // asm string.  There are no MCInst operands for these.
7215     if (Op.isToken()) {
7216       continue;
7217     }
7218     assert(Op.isImm());
7219 
7220     // Handle optional arguments
7221     OptionalIdx[Op.getImmTy()] = i;
7222   }
7223 
7224   // This is a workaround for an llvm quirk which may result in an
7225   // incorrect instruction selection. Lds and non-lds versions of
7226   // MUBUF instructions are identical except that lds versions
7227   // have mandatory 'lds' modifier. However this modifier follows
7228   // optional modifiers and llvm asm matcher regards this 'lds'
7229   // modifier as an optional one. As a result, an lds version
7230   // of opcode may be selected even if it has no 'lds' modifier.
7231   if (IsLdsOpcode && !HasLdsModifier) {
7232     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7233     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7234       Inst.setOpcode(NoLdsOpcode);
7235       IsLdsOpcode = false;
7236     }
7237   }
7238 
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7241 
7242   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7243     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7244   }
7245   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7246 }
7247 
7248 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7249   OptionalImmIndexMap OptionalIdx;
7250 
7251   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7252     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7253 
7254     // Add the register arguments
7255     if (Op.isReg()) {
7256       Op.addRegOperands(Inst, 1);
7257       continue;
7258     }
7259 
7260     // Handle the case where soffset is an immediate
7261     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7262       Op.addImmOperands(Inst, 1);
7263       continue;
7264     }
7265 
7266     // Handle tokens like 'offen' which are sometimes hard-coded into the
7267     // asm string.  There are no MCInst operands for these.
7268     if (Op.isToken()) {
7269       continue;
7270     }
7271     assert(Op.isImm());
7272 
7273     // Handle optional arguments
7274     OptionalIdx[Op.getImmTy()] = i;
7275   }
7276 
7277   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7278                         AMDGPUOperand::ImmTyOffset);
7279   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7280   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7281   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7283 }
7284 
7285 //===----------------------------------------------------------------------===//
7286 // mimg
7287 //===----------------------------------------------------------------------===//
7288 
7289 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7290                               bool IsAtomic) {
7291   unsigned I = 1;
7292   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7293   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7294     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7295   }
7296 
7297   if (IsAtomic) {
7298     // Add src, same as dst
7299     assert(Desc.getNumDefs() == 1);
7300     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7301   }
7302 
7303   OptionalImmIndexMap OptionalIdx;
7304 
7305   for (unsigned E = Operands.size(); I != E; ++I) {
7306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7307 
7308     // Add the register arguments
7309     if (Op.isReg()) {
7310       Op.addRegOperands(Inst, 1);
7311     } else if (Op.isImmModifier()) {
7312       OptionalIdx[Op.getImmTy()] = I;
7313     } else if (!Op.isToken()) {
7314       llvm_unreachable("unexpected operand type");
7315     }
7316   }
7317 
7318   bool IsGFX10Plus = isGFX10Plus();
7319 
7320   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7321   if (IsGFX10Plus)
7322     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7323   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7324   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7325   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7326   if (IsGFX10Plus)
7327     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7328   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7330   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7331   if (!IsGFX10Plus)
7332     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7333   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7334 }
7335 
7336 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7337   cvtMIMG(Inst, Operands, true);
7338 }
7339 
7340 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7341   OptionalImmIndexMap OptionalIdx;
7342   bool IsAtomicReturn = false;
7343 
7344   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7345     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7346     if (!Op.isCPol())
7347       continue;
7348     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7349     break;
7350   }
7351 
7352   if (!IsAtomicReturn) {
7353     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7354     if (NewOpc != -1)
7355       Inst.setOpcode(NewOpc);
7356   }
7357 
7358   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7359                     SIInstrFlags::IsAtomicRet;
7360 
7361   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7363 
7364     // Add the register arguments
7365     if (Op.isReg()) {
7366       Op.addRegOperands(Inst, 1);
7367       if (IsAtomicReturn && i == 1)
7368         Op.addRegOperands(Inst, 1);
7369       continue;
7370     }
7371 
7372     // Handle the case where soffset is an immediate
7373     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7374       Op.addImmOperands(Inst, 1);
7375       continue;
7376     }
7377 
7378     // Handle tokens like 'offen' which are sometimes hard-coded into the
7379     // asm string.  There are no MCInst operands for these.
7380     if (Op.isToken()) {
7381       continue;
7382     }
7383     assert(Op.isImm());
7384 
7385     // Handle optional arguments
7386     OptionalIdx[Op.getImmTy()] = i;
7387   }
7388 
7389   if ((int)Inst.getNumOperands() <=
7390       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7391     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7392   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7393 }
7394 
7395 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7396                                       const OperandVector &Operands) {
7397   for (unsigned I = 1; I < Operands.size(); ++I) {
7398     auto &Operand = (AMDGPUOperand &)*Operands[I];
7399     if (Operand.isReg())
7400       Operand.addRegOperands(Inst, 1);
7401   }
7402 
7403   Inst.addOperand(MCOperand::createImm(1)); // a16
7404 }
7405 
7406 //===----------------------------------------------------------------------===//
7407 // smrd
7408 //===----------------------------------------------------------------------===//
7409 
7410 bool AMDGPUOperand::isSMRDOffset8() const {
7411   return isImm() && isUInt<8>(getImm());
7412 }
7413 
7414 bool AMDGPUOperand::isSMEMOffset() const {
7415   return isImm(); // Offset range is checked later by validator.
7416 }
7417 
7418 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7419   // 32-bit literals are only supported on CI and we only want to use them
7420   // when the offset is > 8-bits.
7421   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7422 }
7423 
7424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7425   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7426 }
7427 
7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7429   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7430 }
7431 
7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7433   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7434 }
7435 
7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7437   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7438 }
7439 
7440 //===----------------------------------------------------------------------===//
7441 // vop3
7442 //===----------------------------------------------------------------------===//
7443 
7444 static bool ConvertOmodMul(int64_t &Mul) {
7445   if (Mul != 1 && Mul != 2 && Mul != 4)
7446     return false;
7447 
7448   Mul >>= 1;
7449   return true;
7450 }
7451 
7452 static bool ConvertOmodDiv(int64_t &Div) {
7453   if (Div == 1) {
7454     Div = 0;
7455     return true;
7456   }
7457 
7458   if (Div == 2) {
7459     Div = 3;
7460     return true;
7461   }
7462 
7463   return false;
7464 }
7465 
7466 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7467 // This is intentional and ensures compatibility with sp3.
7468 // See bug 35397 for details.
7469 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7470   if (BoundCtrl == 0 || BoundCtrl == 1) {
7471     BoundCtrl = 1;
7472     return true;
7473   }
7474   return false;
7475 }
7476 
7477 // Note: the order in this table matches the order of operands in AsmString.
7478 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7479   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7480   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7481   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7482   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7483   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7484   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7485   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7486   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7487   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7488   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7489   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7490   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7491   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7492   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7493   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7494   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7495   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7496   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7497   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7498   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7499   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7500   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7501   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7502   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7503   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7504   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7505   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7506   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7507   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7508   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7509   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7510   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7511   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7512   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7513   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7514   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7515   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7516   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7517   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7518   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7519   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7520 };
7521 
7522 void AMDGPUAsmParser::onBeginOfFile() {
7523   if (!getParser().getStreamer().getTargetStreamer() ||
7524       getSTI().getTargetTriple().getArch() == Triple::r600)
7525     return;
7526 
7527   if (!getTargetStreamer().getTargetID())
7528     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7529 
7530   if (isHsaAbiVersion3AndAbove(&getSTI()))
7531     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7532 }
7533 
7534 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7535 
7536   OperandMatchResultTy res = parseOptionalOpr(Operands);
7537 
7538   // This is a hack to enable hardcoded mandatory operands which follow
7539   // optional operands.
7540   //
7541   // Current design assumes that all operands after the first optional operand
7542   // are also optional. However implementation of some instructions violates
7543   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7544   //
7545   // To alleviate this problem, we have to (implicitly) parse extra operands
7546   // to make sure autogenerated parser of custom operands never hit hardcoded
7547   // mandatory operands.
7548 
7549   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7550     if (res != MatchOperand_Success ||
7551         isToken(AsmToken::EndOfStatement))
7552       break;
7553 
7554     trySkipToken(AsmToken::Comma);
7555     res = parseOptionalOpr(Operands);
7556   }
7557 
7558   return res;
7559 }
7560 
7561 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7562   OperandMatchResultTy res;
7563   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7564     // try to parse any optional operand here
7565     if (Op.IsBit) {
7566       res = parseNamedBit(Op.Name, Operands, Op.Type);
7567     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7568       res = parseOModOperand(Operands);
7569     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7570                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7571                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7572       res = parseSDWASel(Operands, Op.Name, Op.Type);
7573     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7574       res = parseSDWADstUnused(Operands);
7575     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7576                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7577                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7578                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7579       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7580                                         Op.ConvertResult);
7581     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7582       res = parseDim(Operands);
7583     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7584       res = parseCPol(Operands);
7585     } else {
7586       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7587     }
7588     if (res != MatchOperand_NoMatch) {
7589       return res;
7590     }
7591   }
7592   return MatchOperand_NoMatch;
7593 }
7594 
7595 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7596   StringRef Name = getTokenStr();
7597   if (Name == "mul") {
7598     return parseIntWithPrefix("mul", Operands,
7599                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7600   }
7601 
7602   if (Name == "div") {
7603     return parseIntWithPrefix("div", Operands,
7604                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7605   }
7606 
7607   return MatchOperand_NoMatch;
7608 }
7609 
7610 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7611   cvtVOP3P(Inst, Operands);
7612 
7613   int Opc = Inst.getOpcode();
7614 
7615   int SrcNum;
7616   const int Ops[] = { AMDGPU::OpName::src0,
7617                       AMDGPU::OpName::src1,
7618                       AMDGPU::OpName::src2 };
7619   for (SrcNum = 0;
7620        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7621        ++SrcNum);
7622   assert(SrcNum > 0);
7623 
7624   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7625   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7626 
7627   if ((OpSel & (1 << SrcNum)) != 0) {
7628     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7629     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7630     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7631   }
7632 }
7633 
7634 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7635       // 1. This operand is input modifiers
7636   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7637       // 2. This is not last operand
7638       && Desc.NumOperands > (OpNum + 1)
7639       // 3. Next operand is register class
7640       && Desc.OpInfo[OpNum + 1].RegClass != -1
7641       // 4. Next register is not tied to any other operand
7642       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7643 }
7644 
7645 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7646 {
7647   OptionalImmIndexMap OptionalIdx;
7648   unsigned Opc = Inst.getOpcode();
7649 
7650   unsigned I = 1;
7651   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7652   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7653     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7654   }
7655 
7656   for (unsigned E = Operands.size(); I != E; ++I) {
7657     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7658     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7659       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7660     } else if (Op.isInterpSlot() ||
7661                Op.isInterpAttr() ||
7662                Op.isAttrChan()) {
7663       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7664     } else if (Op.isImmModifier()) {
7665       OptionalIdx[Op.getImmTy()] = I;
7666     } else {
7667       llvm_unreachable("unhandled operand type");
7668     }
7669   }
7670 
7671   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7673   }
7674 
7675   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7676     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7677   }
7678 
7679   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7680     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7681   }
7682 }
7683 
7684 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7685                               OptionalImmIndexMap &OptionalIdx) {
7686   unsigned Opc = Inst.getOpcode();
7687 
7688   unsigned I = 1;
7689   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7690   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7691     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7692   }
7693 
7694   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7695     // This instruction has src modifiers
7696     for (unsigned E = Operands.size(); I != E; ++I) {
7697       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7698       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7699         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7700       } else if (Op.isImmModifier()) {
7701         OptionalIdx[Op.getImmTy()] = I;
7702       } else if (Op.isRegOrImm()) {
7703         Op.addRegOrImmOperands(Inst, 1);
7704       } else {
7705         llvm_unreachable("unhandled operand type");
7706       }
7707     }
7708   } else {
7709     // No src modifiers
7710     for (unsigned E = Operands.size(); I != E; ++I) {
7711       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7712       if (Op.isMod()) {
7713         OptionalIdx[Op.getImmTy()] = I;
7714       } else {
7715         Op.addRegOrImmOperands(Inst, 1);
7716       }
7717     }
7718   }
7719 
7720   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7722   }
7723 
7724   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7725     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7726   }
7727 
7728   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7729   // it has src2 register operand that is tied to dst operand
7730   // we don't allow modifiers for this operand in assembler so src2_modifiers
7731   // should be 0.
7732   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7733       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7734       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7735       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7736       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7737       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7738       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7739       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7740       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7741       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7742       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7743     auto it = Inst.begin();
7744     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7745     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7746     ++it;
7747     // Copy the operand to ensure it's not invalidated when Inst grows.
7748     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7749   }
7750 }
7751 
7752 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7753   OptionalImmIndexMap OptionalIdx;
7754   cvtVOP3(Inst, Operands, OptionalIdx);
7755 }
7756 
7757 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7758                                OptionalImmIndexMap &OptIdx) {
7759   const int Opc = Inst.getOpcode();
7760   const MCInstrDesc &Desc = MII.get(Opc);
7761 
7762   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7763 
7764   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7765     assert(!IsPacked);
7766     Inst.addOperand(Inst.getOperand(0));
7767   }
7768 
7769   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7770   // instruction, and then figure out where to actually put the modifiers
7771 
7772   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7773   if (OpSelIdx != -1) {
7774     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7775   }
7776 
7777   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7778   if (OpSelHiIdx != -1) {
7779     int DefaultVal = IsPacked ? -1 : 0;
7780     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7781                           DefaultVal);
7782   }
7783 
7784   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7785   if (NegLoIdx != -1) {
7786     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7787     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7788   }
7789 
7790   const int Ops[] = { AMDGPU::OpName::src0,
7791                       AMDGPU::OpName::src1,
7792                       AMDGPU::OpName::src2 };
7793   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7794                          AMDGPU::OpName::src1_modifiers,
7795                          AMDGPU::OpName::src2_modifiers };
7796 
7797   unsigned OpSel = 0;
7798   unsigned OpSelHi = 0;
7799   unsigned NegLo = 0;
7800   unsigned NegHi = 0;
7801 
7802   if (OpSelIdx != -1)
7803     OpSel = Inst.getOperand(OpSelIdx).getImm();
7804 
7805   if (OpSelHiIdx != -1)
7806     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7807 
7808   if (NegLoIdx != -1) {
7809     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7810     NegLo = Inst.getOperand(NegLoIdx).getImm();
7811     NegHi = Inst.getOperand(NegHiIdx).getImm();
7812   }
7813 
7814   for (int J = 0; J < 3; ++J) {
7815     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7816     if (OpIdx == -1)
7817       break;
7818 
7819     uint32_t ModVal = 0;
7820 
7821     if ((OpSel & (1 << J)) != 0)
7822       ModVal |= SISrcMods::OP_SEL_0;
7823 
7824     if ((OpSelHi & (1 << J)) != 0)
7825       ModVal |= SISrcMods::OP_SEL_1;
7826 
7827     if ((NegLo & (1 << J)) != 0)
7828       ModVal |= SISrcMods::NEG;
7829 
7830     if ((NegHi & (1 << J)) != 0)
7831       ModVal |= SISrcMods::NEG_HI;
7832 
7833     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7834 
7835     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7836   }
7837 }
7838 
7839 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7840   OptionalImmIndexMap OptIdx;
7841   cvtVOP3(Inst, Operands, OptIdx);
7842   cvtVOP3P(Inst, Operands, OptIdx);
7843 }
7844 
7845 //===----------------------------------------------------------------------===//
7846 // dpp
7847 //===----------------------------------------------------------------------===//
7848 
7849 bool AMDGPUOperand::isDPP8() const {
7850   return isImmTy(ImmTyDPP8);
7851 }
7852 
7853 bool AMDGPUOperand::isDPPCtrl() const {
7854   using namespace AMDGPU::DPP;
7855 
7856   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7857   if (result) {
7858     int64_t Imm = getImm();
7859     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7860            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7861            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7862            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7863            (Imm == DppCtrl::WAVE_SHL1) ||
7864            (Imm == DppCtrl::WAVE_ROL1) ||
7865            (Imm == DppCtrl::WAVE_SHR1) ||
7866            (Imm == DppCtrl::WAVE_ROR1) ||
7867            (Imm == DppCtrl::ROW_MIRROR) ||
7868            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7869            (Imm == DppCtrl::BCAST15) ||
7870            (Imm == DppCtrl::BCAST31) ||
7871            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7872            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7873   }
7874   return false;
7875 }
7876 
7877 //===----------------------------------------------------------------------===//
7878 // mAI
7879 //===----------------------------------------------------------------------===//
7880 
7881 bool AMDGPUOperand::isBLGP() const {
7882   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7883 }
7884 
7885 bool AMDGPUOperand::isCBSZ() const {
7886   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7887 }
7888 
7889 bool AMDGPUOperand::isABID() const {
7890   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7891 }
7892 
7893 bool AMDGPUOperand::isS16Imm() const {
7894   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7895 }
7896 
7897 bool AMDGPUOperand::isU16Imm() const {
7898   return isImm() && isUInt<16>(getImm());
7899 }
7900 
7901 //===----------------------------------------------------------------------===//
7902 // dim
7903 //===----------------------------------------------------------------------===//
7904 
7905 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7906   // We want to allow "dim:1D" etc.,
7907   // but the initial 1 is tokenized as an integer.
7908   std::string Token;
7909   if (isToken(AsmToken::Integer)) {
7910     SMLoc Loc = getToken().getEndLoc();
7911     Token = std::string(getTokenStr());
7912     lex();
7913     if (getLoc() != Loc)
7914       return false;
7915   }
7916 
7917   StringRef Suffix;
7918   if (!parseId(Suffix))
7919     return false;
7920   Token += Suffix;
7921 
7922   StringRef DimId = Token;
7923   if (DimId.startswith("SQ_RSRC_IMG_"))
7924     DimId = DimId.drop_front(12);
7925 
7926   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7927   if (!DimInfo)
7928     return false;
7929 
7930   Encoding = DimInfo->Encoding;
7931   return true;
7932 }
7933 
7934 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7935   if (!isGFX10Plus())
7936     return MatchOperand_NoMatch;
7937 
7938   SMLoc S = getLoc();
7939 
7940   if (!trySkipId("dim", AsmToken::Colon))
7941     return MatchOperand_NoMatch;
7942 
7943   unsigned Encoding;
7944   SMLoc Loc = getLoc();
7945   if (!parseDimId(Encoding)) {
7946     Error(Loc, "invalid dim value");
7947     return MatchOperand_ParseFail;
7948   }
7949 
7950   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7951                                               AMDGPUOperand::ImmTyDim));
7952   return MatchOperand_Success;
7953 }
7954 
7955 //===----------------------------------------------------------------------===//
7956 // dpp
7957 //===----------------------------------------------------------------------===//
7958 
7959 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7960   SMLoc S = getLoc();
7961 
7962   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7963     return MatchOperand_NoMatch;
7964 
7965   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7966 
7967   int64_t Sels[8];
7968 
7969   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7970     return MatchOperand_ParseFail;
7971 
7972   for (size_t i = 0; i < 8; ++i) {
7973     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7974       return MatchOperand_ParseFail;
7975 
7976     SMLoc Loc = getLoc();
7977     if (getParser().parseAbsoluteExpression(Sels[i]))
7978       return MatchOperand_ParseFail;
7979     if (0 > Sels[i] || 7 < Sels[i]) {
7980       Error(Loc, "expected a 3-bit value");
7981       return MatchOperand_ParseFail;
7982     }
7983   }
7984 
7985   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7986     return MatchOperand_ParseFail;
7987 
7988   unsigned DPP8 = 0;
7989   for (size_t i = 0; i < 8; ++i)
7990     DPP8 |= (Sels[i] << (i * 3));
7991 
7992   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7993   return MatchOperand_Success;
7994 }
7995 
7996 bool
7997 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7998                                     const OperandVector &Operands) {
7999   if (Ctrl == "row_newbcast")
8000     return isGFX90A();
8001 
8002   if (Ctrl == "row_share" ||
8003       Ctrl == "row_xmask")
8004     return isGFX10Plus();
8005 
8006   if (Ctrl == "wave_shl" ||
8007       Ctrl == "wave_shr" ||
8008       Ctrl == "wave_rol" ||
8009       Ctrl == "wave_ror" ||
8010       Ctrl == "row_bcast")
8011     return isVI() || isGFX9();
8012 
8013   return Ctrl == "row_mirror" ||
8014          Ctrl == "row_half_mirror" ||
8015          Ctrl == "quad_perm" ||
8016          Ctrl == "row_shl" ||
8017          Ctrl == "row_shr" ||
8018          Ctrl == "row_ror";
8019 }
8020 
8021 int64_t
8022 AMDGPUAsmParser::parseDPPCtrlPerm() {
8023   // quad_perm:[%d,%d,%d,%d]
8024 
8025   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8026     return -1;
8027 
8028   int64_t Val = 0;
8029   for (int i = 0; i < 4; ++i) {
8030     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8031       return -1;
8032 
8033     int64_t Temp;
8034     SMLoc Loc = getLoc();
8035     if (getParser().parseAbsoluteExpression(Temp))
8036       return -1;
8037     if (Temp < 0 || Temp > 3) {
8038       Error(Loc, "expected a 2-bit value");
8039       return -1;
8040     }
8041 
8042     Val += (Temp << i * 2);
8043   }
8044 
8045   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8046     return -1;
8047 
8048   return Val;
8049 }
8050 
8051 int64_t
8052 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8053   using namespace AMDGPU::DPP;
8054 
8055   // sel:%d
8056 
8057   int64_t Val;
8058   SMLoc Loc = getLoc();
8059 
8060   if (getParser().parseAbsoluteExpression(Val))
8061     return -1;
8062 
8063   struct DppCtrlCheck {
8064     int64_t Ctrl;
8065     int Lo;
8066     int Hi;
8067   };
8068 
8069   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8070     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8071     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8072     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8073     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8074     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8075     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8076     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8077     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8078     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8079     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8080     .Default({-1, 0, 0});
8081 
8082   bool Valid;
8083   if (Check.Ctrl == -1) {
8084     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8085     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8086   } else {
8087     Valid = Check.Lo <= Val && Val <= Check.Hi;
8088     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8089   }
8090 
8091   if (!Valid) {
8092     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8093     return -1;
8094   }
8095 
8096   return Val;
8097 }
8098 
8099 OperandMatchResultTy
8100 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8101   using namespace AMDGPU::DPP;
8102 
8103   if (!isToken(AsmToken::Identifier) ||
8104       !isSupportedDPPCtrl(getTokenStr(), Operands))
8105     return MatchOperand_NoMatch;
8106 
8107   SMLoc S = getLoc();
8108   int64_t Val = -1;
8109   StringRef Ctrl;
8110 
8111   parseId(Ctrl);
8112 
8113   if (Ctrl == "row_mirror") {
8114     Val = DppCtrl::ROW_MIRROR;
8115   } else if (Ctrl == "row_half_mirror") {
8116     Val = DppCtrl::ROW_HALF_MIRROR;
8117   } else {
8118     if (skipToken(AsmToken::Colon, "expected a colon")) {
8119       if (Ctrl == "quad_perm") {
8120         Val = parseDPPCtrlPerm();
8121       } else {
8122         Val = parseDPPCtrlSel(Ctrl);
8123       }
8124     }
8125   }
8126 
8127   if (Val == -1)
8128     return MatchOperand_ParseFail;
8129 
8130   Operands.push_back(
8131     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8132   return MatchOperand_Success;
8133 }
8134 
8135 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8136   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8137 }
8138 
8139 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8140   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8141 }
8142 
8143 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8144   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8145 }
8146 
8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8148   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8149 }
8150 
8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8152   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8153 }
8154 
8155 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8156   OptionalImmIndexMap OptionalIdx;
8157 
8158   unsigned Opc = Inst.getOpcode();
8159   bool HasModifiers =
8160       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8161   unsigned I = 1;
8162   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8163   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8164     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8165   }
8166 
8167   int Fi = 0;
8168   for (unsigned E = Operands.size(); I != E; ++I) {
8169     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8170                                             MCOI::TIED_TO);
8171     if (TiedTo != -1) {
8172       assert((unsigned)TiedTo < Inst.getNumOperands());
8173       // handle tied old or src2 for MAC instructions
8174       Inst.addOperand(Inst.getOperand(TiedTo));
8175     }
8176     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8177     // Add the register arguments
8178     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8179       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8180       // Skip it.
8181       continue;
8182     }
8183 
8184     if (IsDPP8) {
8185       if (Op.isDPP8()) {
8186         Op.addImmOperands(Inst, 1);
8187       } else if (HasModifiers &&
8188                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8189         Op.addRegWithFPInputModsOperands(Inst, 2);
8190       } else if (Op.isFI()) {
8191         Fi = Op.getImm();
8192       } else if (Op.isReg()) {
8193         Op.addRegOperands(Inst, 1);
8194       } else {
8195         llvm_unreachable("Invalid operand type");
8196       }
8197     } else {
8198       if (HasModifiers &&
8199           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8200         Op.addRegWithFPInputModsOperands(Inst, 2);
8201       } else if (Op.isReg()) {
8202         Op.addRegOperands(Inst, 1);
8203       } else if (Op.isDPPCtrl()) {
8204         Op.addImmOperands(Inst, 1);
8205       } else if (Op.isImm()) {
8206         // Handle optional arguments
8207         OptionalIdx[Op.getImmTy()] = I;
8208       } else {
8209         llvm_unreachable("Invalid operand type");
8210       }
8211     }
8212   }
8213 
8214   if (IsDPP8) {
8215     using namespace llvm::AMDGPU::DPP;
8216     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8217   } else {
8218     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8219     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8220     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8221     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8222       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8223     }
8224   }
8225 }
8226 
8227 //===----------------------------------------------------------------------===//
8228 // sdwa
8229 //===----------------------------------------------------------------------===//
8230 
8231 OperandMatchResultTy
8232 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8233                               AMDGPUOperand::ImmTy Type) {
8234   using namespace llvm::AMDGPU::SDWA;
8235 
8236   SMLoc S = getLoc();
8237   StringRef Value;
8238   OperandMatchResultTy res;
8239 
8240   SMLoc StringLoc;
8241   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8242   if (res != MatchOperand_Success) {
8243     return res;
8244   }
8245 
8246   int64_t Int;
8247   Int = StringSwitch<int64_t>(Value)
8248         .Case("BYTE_0", SdwaSel::BYTE_0)
8249         .Case("BYTE_1", SdwaSel::BYTE_1)
8250         .Case("BYTE_2", SdwaSel::BYTE_2)
8251         .Case("BYTE_3", SdwaSel::BYTE_3)
8252         .Case("WORD_0", SdwaSel::WORD_0)
8253         .Case("WORD_1", SdwaSel::WORD_1)
8254         .Case("DWORD", SdwaSel::DWORD)
8255         .Default(0xffffffff);
8256 
8257   if (Int == 0xffffffff) {
8258     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8259     return MatchOperand_ParseFail;
8260   }
8261 
8262   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8263   return MatchOperand_Success;
8264 }
8265 
8266 OperandMatchResultTy
8267 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8268   using namespace llvm::AMDGPU::SDWA;
8269 
8270   SMLoc S = getLoc();
8271   StringRef Value;
8272   OperandMatchResultTy res;
8273 
8274   SMLoc StringLoc;
8275   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8276   if (res != MatchOperand_Success) {
8277     return res;
8278   }
8279 
8280   int64_t Int;
8281   Int = StringSwitch<int64_t>(Value)
8282         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8283         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8284         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8285         .Default(0xffffffff);
8286 
8287   if (Int == 0xffffffff) {
8288     Error(StringLoc, "invalid dst_unused value");
8289     return MatchOperand_ParseFail;
8290   }
8291 
8292   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8293   return MatchOperand_Success;
8294 }
8295 
8296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8297   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8298 }
8299 
8300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8301   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8302 }
8303 
8304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8305   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8306 }
8307 
8308 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8309   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8310 }
8311 
8312 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8313   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8314 }
8315 
8316 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8317                               uint64_t BasicInstType,
8318                               bool SkipDstVcc,
8319                               bool SkipSrcVcc) {
8320   using namespace llvm::AMDGPU::SDWA;
8321 
8322   OptionalImmIndexMap OptionalIdx;
8323   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8324   bool SkippedVcc = false;
8325 
8326   unsigned I = 1;
8327   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8328   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8329     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8330   }
8331 
8332   for (unsigned E = Operands.size(); I != E; ++I) {
8333     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8334     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8335         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8336       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8337       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8338       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8339       // Skip VCC only if we didn't skip it on previous iteration.
8340       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8341       if (BasicInstType == SIInstrFlags::VOP2 &&
8342           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8343            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8344         SkippedVcc = true;
8345         continue;
8346       } else if (BasicInstType == SIInstrFlags::VOPC &&
8347                  Inst.getNumOperands() == 0) {
8348         SkippedVcc = true;
8349         continue;
8350       }
8351     }
8352     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8353       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8354     } else if (Op.isImm()) {
8355       // Handle optional arguments
8356       OptionalIdx[Op.getImmTy()] = I;
8357     } else {
8358       llvm_unreachable("Invalid operand type");
8359     }
8360     SkippedVcc = false;
8361   }
8362 
8363   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8364       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8365       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8366     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8367     switch (BasicInstType) {
8368     case SIInstrFlags::VOP1:
8369       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8370       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8371         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8372       }
8373       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8374       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8375       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8376       break;
8377 
8378     case SIInstrFlags::VOP2:
8379       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8380       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8381         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8382       }
8383       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8384       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8385       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8386       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8387       break;
8388 
8389     case SIInstrFlags::VOPC:
8390       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8391         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8392       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8393       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8394       break;
8395 
8396     default:
8397       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8398     }
8399   }
8400 
8401   // special case v_mac_{f16, f32}:
8402   // it has src2 register operand that is tied to dst operand
8403   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8404       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8405     auto it = Inst.begin();
8406     std::advance(
8407       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8408     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8409   }
8410 }
8411 
8412 //===----------------------------------------------------------------------===//
8413 // mAI
8414 //===----------------------------------------------------------------------===//
8415 
8416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8417   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8418 }
8419 
8420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8421   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8422 }
8423 
8424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8425   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8426 }
8427 
8428 /// Force static initialization.
8429 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8430   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8431   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8432 }
8433 
8434 #define GET_REGISTER_MATCHER
8435 #define GET_MATCHER_IMPLEMENTATION
8436 #define GET_MNEMONIC_SPELL_CHECKER
8437 #define GET_MNEMONIC_CHECKER
8438 #include "AMDGPUGenAsmMatcher.inc"
8439 
8440 // This function should be defined after auto-generated include so that we have
8441 // MatchClassKind enum defined
8442 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8443                                                      unsigned Kind) {
8444   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8445   // But MatchInstructionImpl() expects to meet token and fails to validate
8446   // operand. This method checks if we are given immediate operand but expect to
8447   // get corresponding token.
8448   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8449   switch (Kind) {
8450   case MCK_addr64:
8451     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8452   case MCK_gds:
8453     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8454   case MCK_lds:
8455     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8456   case MCK_idxen:
8457     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8458   case MCK_offen:
8459     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8460   case MCK_SSrcB32:
8461     // When operands have expression values, they will return true for isToken,
8462     // because it is not possible to distinguish between a token and an
8463     // expression at parse time. MatchInstructionImpl() will always try to
8464     // match an operand as a token, when isToken returns true, and when the
8465     // name of the expression is not a valid token, the match will fail,
8466     // so we need to handle it here.
8467     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8468   case MCK_SSrcF32:
8469     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8470   case MCK_SoppBrTarget:
8471     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8472   case MCK_VReg32OrOff:
8473     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8474   case MCK_InterpSlot:
8475     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8476   case MCK_Attr:
8477     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8478   case MCK_AttrChan:
8479     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8480   case MCK_ImmSMEMOffset:
8481     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8482   case MCK_SReg_64:
8483   case MCK_SReg_64_XEXEC:
8484     // Null is defined as a 32-bit register but
8485     // it should also be enabled with 64-bit operands.
8486     // The following code enables it for SReg_64 operands
8487     // used as source and destination. Remaining source
8488     // operands are handled in isInlinableImm.
8489     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8490   default:
8491     return Match_InvalidOperand;
8492   }
8493 }
8494 
8495 //===----------------------------------------------------------------------===//
8496 // endpgm
8497 //===----------------------------------------------------------------------===//
8498 
8499 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8500   SMLoc S = getLoc();
8501   int64_t Imm = 0;
8502 
8503   if (!parseExpr(Imm)) {
8504     // The operand is optional, if not present default to 0
8505     Imm = 0;
8506   }
8507 
8508   if (!isUInt<16>(Imm)) {
8509     Error(S, "expected a 16-bit value");
8510     return MatchOperand_ParseFail;
8511   }
8512 
8513   Operands.push_back(
8514       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8515   return MatchOperand_Success;
8516 }
8517 
8518 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8519