1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   bool isGFX90A() const {
1390     return AMDGPU::isGFX90A(getSTI());
1391   }
1392 
1393   bool isGFX9Plus() const {
1394     return AMDGPU::isGFX9Plus(getSTI());
1395   }
1396 
1397   bool isGFX10() const {
1398     return AMDGPU::isGFX10(getSTI());
1399   }
1400 
1401   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1402 
1403   bool isGFX10_BEncoding() const {
1404     return AMDGPU::isGFX10_BEncoding(getSTI());
1405   }
1406 
1407   bool hasInv2PiInlineImm() const {
1408     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1409   }
1410 
1411   bool hasFlatOffsets() const {
1412     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1413   }
1414 
1415   bool hasArchitectedFlatScratch() const {
1416     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1417   }
1418 
1419   bool hasSGPR102_SGPR103() const {
1420     return !isVI() && !isGFX9();
1421   }
1422 
1423   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1424 
1425   bool hasIntClamp() const {
1426     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1427   }
1428 
1429   AMDGPUTargetStreamer &getTargetStreamer() {
1430     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1431     return static_cast<AMDGPUTargetStreamer &>(TS);
1432   }
1433 
1434   const MCRegisterInfo *getMRI() const {
1435     // We need this const_cast because for some reason getContext() is not const
1436     // in MCAsmParser.
1437     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1438   }
1439 
1440   const MCInstrInfo *getMII() const {
1441     return &MII;
1442   }
1443 
1444   const FeatureBitset &getFeatureBits() const {
1445     return getSTI().getFeatureBits();
1446   }
1447 
1448   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1449   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1450   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1451 
1452   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1453   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1454   bool isForcedDPP() const { return ForcedDPP; }
1455   bool isForcedSDWA() const { return ForcedSDWA; }
1456   ArrayRef<unsigned> getMatchedVariants() const;
1457   StringRef getMatchedVariantName() const;
1458 
1459   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1460   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1461                      bool RestoreOnFailure);
1462   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1463   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1464                                         SMLoc &EndLoc) override;
1465   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1466   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1467                                       unsigned Kind) override;
1468   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1469                                OperandVector &Operands, MCStreamer &Out,
1470                                uint64_t &ErrorInfo,
1471                                bool MatchingInlineAsm) override;
1472   bool ParseDirective(AsmToken DirectiveID) override;
1473   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1474                                     OperandMode Mode = OperandMode_Default);
1475   StringRef parseMnemonicSuffix(StringRef Name);
1476   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1477                         SMLoc NameLoc, OperandVector &Operands) override;
1478   //bool ProcessInstruction(MCInst &Inst);
1479 
1480   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1481 
1482   OperandMatchResultTy
1483   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1484                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1485                      bool (*ConvertResult)(int64_t &) = nullptr);
1486 
1487   OperandMatchResultTy
1488   parseOperandArrayWithPrefix(const char *Prefix,
1489                               OperandVector &Operands,
1490                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1491                               bool (*ConvertResult)(int64_t&) = nullptr);
1492 
1493   OperandMatchResultTy
1494   parseNamedBit(StringRef Name, OperandVector &Operands,
1495                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1496   OperandMatchResultTy parseCPol(OperandVector &Operands);
1497   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1498                                              StringRef &Value,
1499                                              SMLoc &StringLoc);
1500 
1501   bool isModifier();
1502   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1503   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1504   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1505   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1506   bool parseSP3NegModifier();
1507   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1508   OperandMatchResultTy parseReg(OperandVector &Operands);
1509   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1510   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1511   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1512   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1513   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1514   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1515   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1516   OperandMatchResultTy parseUfmt(int64_t &Format);
1517   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1518   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1519   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1520   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1521   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1522   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1523   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1524 
1525   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1526   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1527   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1528   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1529 
1530   bool parseCnt(int64_t &IntVal);
1531   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1532   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1533 
1534 private:
1535   struct OperandInfoTy {
1536     SMLoc Loc;
1537     int64_t Id;
1538     bool IsSymbolic = false;
1539     bool IsDefined = false;
1540 
1541     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1542   };
1543 
1544   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1545   bool validateSendMsg(const OperandInfoTy &Msg,
1546                        const OperandInfoTy &Op,
1547                        const OperandInfoTy &Stream);
1548 
1549   bool parseHwregBody(OperandInfoTy &HwReg,
1550                       OperandInfoTy &Offset,
1551                       OperandInfoTy &Width);
1552   bool validateHwreg(const OperandInfoTy &HwReg,
1553                      const OperandInfoTy &Offset,
1554                      const OperandInfoTy &Width);
1555 
1556   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1557   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1558 
1559   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1560                       const OperandVector &Operands) const;
1561   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1562   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1563   SMLoc getLitLoc(const OperandVector &Operands) const;
1564   SMLoc getConstLoc(const OperandVector &Operands) const;
1565 
1566   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1567   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1568   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1569   bool validateSOPLiteral(const MCInst &Inst) const;
1570   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1571   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1572   bool validateIntClampSupported(const MCInst &Inst);
1573   bool validateMIMGAtomicDMask(const MCInst &Inst);
1574   bool validateMIMGGatherDMask(const MCInst &Inst);
1575   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1576   bool validateMIMGDataSize(const MCInst &Inst);
1577   bool validateMIMGAddrSize(const MCInst &Inst);
1578   bool validateMIMGD16(const MCInst &Inst);
1579   bool validateMIMGDim(const MCInst &Inst);
1580   bool validateMIMGMSAA(const MCInst &Inst);
1581   bool validateOpSel(const MCInst &Inst);
1582   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1583   bool validateVccOperand(unsigned Reg) const;
1584   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1585   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1586   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1587   bool validateAGPRLdSt(const MCInst &Inst) const;
1588   bool validateVGPRAlign(const MCInst &Inst) const;
1589   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateDivScale(const MCInst &Inst);
1591   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1592                              const SMLoc &IDLoc);
1593   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1594   unsigned getConstantBusLimit(unsigned Opcode) const;
1595   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1596   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1597   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1598 
1599   bool isSupportedMnemo(StringRef Mnemo,
1600                         const FeatureBitset &FBS);
1601   bool isSupportedMnemo(StringRef Mnemo,
1602                         const FeatureBitset &FBS,
1603                         ArrayRef<unsigned> Variants);
1604   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1605 
1606   bool isId(const StringRef Id) const;
1607   bool isId(const AsmToken &Token, const StringRef Id) const;
1608   bool isToken(const AsmToken::TokenKind Kind) const;
1609   bool trySkipId(const StringRef Id);
1610   bool trySkipId(const StringRef Pref, const StringRef Id);
1611   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1612   bool trySkipToken(const AsmToken::TokenKind Kind);
1613   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1614   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1615   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1616 
1617   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1618   AsmToken::TokenKind getTokenKind() const;
1619   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1620   bool parseExpr(OperandVector &Operands);
1621   StringRef getTokenStr() const;
1622   AsmToken peekToken();
1623   AsmToken getToken() const;
1624   SMLoc getLoc() const;
1625   void lex();
1626 
1627 public:
1628   void onBeginOfFile() override;
1629 
1630   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1631   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1632 
1633   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1634   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1635   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1636   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1637   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1638   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1639 
1640   bool parseSwizzleOperand(int64_t &Op,
1641                            const unsigned MinVal,
1642                            const unsigned MaxVal,
1643                            const StringRef ErrMsg,
1644                            SMLoc &Loc);
1645   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1646                             const unsigned MinVal,
1647                             const unsigned MaxVal,
1648                             const StringRef ErrMsg);
1649   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1650   bool parseSwizzleOffset(int64_t &Imm);
1651   bool parseSwizzleMacro(int64_t &Imm);
1652   bool parseSwizzleQuadPerm(int64_t &Imm);
1653   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1654   bool parseSwizzleBroadcast(int64_t &Imm);
1655   bool parseSwizzleSwap(int64_t &Imm);
1656   bool parseSwizzleReverse(int64_t &Imm);
1657 
1658   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1659   int64_t parseGPRIdxMacro();
1660 
1661   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1662   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1663   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1664   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1665 
1666   AMDGPUOperand::Ptr defaultCPol() const;
1667 
1668   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1669   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1670   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1671   AMDGPUOperand::Ptr defaultFlatOffset() const;
1672 
1673   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1674 
1675   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1676                OptionalImmIndexMap &OptionalIdx);
1677   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1678   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1679   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1680   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1681                 OptionalImmIndexMap &OptionalIdx);
1682 
1683   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1684 
1685   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1686                bool IsAtomic = false);
1687   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1688   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1689 
1690   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1691 
1692   bool parseDimId(unsigned &Encoding);
1693   OperandMatchResultTy parseDim(OperandVector &Operands);
1694   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1695   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1696   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1697   int64_t parseDPPCtrlSel(StringRef Ctrl);
1698   int64_t parseDPPCtrlPerm();
1699   AMDGPUOperand::Ptr defaultRowMask() const;
1700   AMDGPUOperand::Ptr defaultBankMask() const;
1701   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1702   AMDGPUOperand::Ptr defaultFI() const;
1703   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1704   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1705 
1706   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1707                                     AMDGPUOperand::ImmTy Type);
1708   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1709   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1710   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1711   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1712   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1713   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1714   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1715                uint64_t BasicInstType,
1716                bool SkipDstVcc = false,
1717                bool SkipSrcVcc = false);
1718 
1719   AMDGPUOperand::Ptr defaultBLGP() const;
1720   AMDGPUOperand::Ptr defaultCBSZ() const;
1721   AMDGPUOperand::Ptr defaultABID() const;
1722 
1723   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1724   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1725 };
1726 
1727 struct OptionalOperand {
1728   const char *Name;
1729   AMDGPUOperand::ImmTy Type;
1730   bool IsBit;
1731   bool (*ConvertResult)(int64_t&);
1732 };
1733 
1734 } // end anonymous namespace
1735 
1736 // May be called with integer type with equivalent bitwidth.
1737 static const fltSemantics *getFltSemantics(unsigned Size) {
1738   switch (Size) {
1739   case 4:
1740     return &APFloat::IEEEsingle();
1741   case 8:
1742     return &APFloat::IEEEdouble();
1743   case 2:
1744     return &APFloat::IEEEhalf();
1745   default:
1746     llvm_unreachable("unsupported fp type");
1747   }
1748 }
1749 
1750 static const fltSemantics *getFltSemantics(MVT VT) {
1751   return getFltSemantics(VT.getSizeInBits() / 8);
1752 }
1753 
1754 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1755   switch (OperandType) {
1756   case AMDGPU::OPERAND_REG_IMM_INT32:
1757   case AMDGPU::OPERAND_REG_IMM_FP32:
1758   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1759   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1760   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1761   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1762   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1763   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1764   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1766   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1767   case AMDGPU::OPERAND_KIMM32:
1768     return &APFloat::IEEEsingle();
1769   case AMDGPU::OPERAND_REG_IMM_INT64:
1770   case AMDGPU::OPERAND_REG_IMM_FP64:
1771   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1772   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1773   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1774     return &APFloat::IEEEdouble();
1775   case AMDGPU::OPERAND_REG_IMM_INT16:
1776   case AMDGPU::OPERAND_REG_IMM_FP16:
1777   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1778   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1779   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1780   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1781   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1782   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1785   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1786   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1787   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1788   case AMDGPU::OPERAND_KIMM16:
1789     return &APFloat::IEEEhalf();
1790   default:
1791     llvm_unreachable("unsupported fp type");
1792   }
1793 }
1794 
1795 //===----------------------------------------------------------------------===//
1796 // Operand
1797 //===----------------------------------------------------------------------===//
1798 
1799 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1800   bool Lost;
1801 
1802   // Convert literal to single precision
1803   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1804                                                APFloat::rmNearestTiesToEven,
1805                                                &Lost);
1806   // We allow precision lost but not overflow or underflow
1807   if (Status != APFloat::opOK &&
1808       Lost &&
1809       ((Status & APFloat::opOverflow)  != 0 ||
1810        (Status & APFloat::opUnderflow) != 0)) {
1811     return false;
1812   }
1813 
1814   return true;
1815 }
1816 
1817 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1818   return isUIntN(Size, Val) || isIntN(Size, Val);
1819 }
1820 
1821 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1822   if (VT.getScalarType() == MVT::i16) {
1823     // FP immediate values are broken.
1824     return isInlinableIntLiteral(Val);
1825   }
1826 
1827   // f16/v2f16 operands work correctly for all values.
1828   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1829 }
1830 
1831 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1832 
1833   // This is a hack to enable named inline values like
1834   // shared_base with both 32-bit and 64-bit operands.
1835   // Note that these values are defined as
1836   // 32-bit operands only.
1837   if (isInlineValue()) {
1838     return true;
1839   }
1840 
1841   if (!isImmTy(ImmTyNone)) {
1842     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1843     return false;
1844   }
1845   // TODO: We should avoid using host float here. It would be better to
1846   // check the float bit values which is what a few other places do.
1847   // We've had bot failures before due to weird NaN support on mips hosts.
1848 
1849   APInt Literal(64, Imm.Val);
1850 
1851   if (Imm.IsFPImm) { // We got fp literal token
1852     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1853       return AMDGPU::isInlinableLiteral64(Imm.Val,
1854                                           AsmParser->hasInv2PiInlineImm());
1855     }
1856 
1857     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1858     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1859       return false;
1860 
1861     if (type.getScalarSizeInBits() == 16) {
1862       return isInlineableLiteralOp16(
1863         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1864         type, AsmParser->hasInv2PiInlineImm());
1865     }
1866 
1867     // Check if single precision literal is inlinable
1868     return AMDGPU::isInlinableLiteral32(
1869       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1870       AsmParser->hasInv2PiInlineImm());
1871   }
1872 
1873   // We got int literal token.
1874   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875     return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                         AsmParser->hasInv2PiInlineImm());
1877   }
1878 
1879   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1880     return false;
1881   }
1882 
1883   if (type.getScalarSizeInBits() == 16) {
1884     return isInlineableLiteralOp16(
1885       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1886       type, AsmParser->hasInv2PiInlineImm());
1887   }
1888 
1889   return AMDGPU::isInlinableLiteral32(
1890     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1891     AsmParser->hasInv2PiInlineImm());
1892 }
1893 
1894 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1895   // Check that this immediate can be added as literal
1896   if (!isImmTy(ImmTyNone)) {
1897     return false;
1898   }
1899 
1900   if (!Imm.IsFPImm) {
1901     // We got int literal token.
1902 
1903     if (type == MVT::f64 && hasFPModifiers()) {
1904       // Cannot apply fp modifiers to int literals preserving the same semantics
1905       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1906       // disable these cases.
1907       return false;
1908     }
1909 
1910     unsigned Size = type.getSizeInBits();
1911     if (Size == 64)
1912       Size = 32;
1913 
1914     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1915     // types.
1916     return isSafeTruncation(Imm.Val, Size);
1917   }
1918 
1919   // We got fp literal token
1920   if (type == MVT::f64) { // Expected 64-bit fp operand
1921     // We would set low 64-bits of literal to zeroes but we accept this literals
1922     return true;
1923   }
1924 
1925   if (type == MVT::i64) { // Expected 64-bit int operand
1926     // We don't allow fp literals in 64-bit integer instructions. It is
1927     // unclear how we should encode them.
1928     return false;
1929   }
1930 
1931   // We allow fp literals with f16x2 operands assuming that the specified
1932   // literal goes into the lower half and the upper half is zero. We also
1933   // require that the literal may be losslessly converted to f16.
1934   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1935                      (type == MVT::v2i16)? MVT::i16 :
1936                      (type == MVT::v2f32)? MVT::f32 : type;
1937 
1938   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1939   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1940 }
1941 
1942 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1943   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1944 }
1945 
1946 bool AMDGPUOperand::isVRegWithInputMods() const {
1947   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1948          // GFX90A allows DPP on 64-bit operands.
1949          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1950           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1951 }
1952 
1953 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1954   if (AsmParser->isVI())
1955     return isVReg32();
1956   else if (AsmParser->isGFX9Plus())
1957     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1958   else
1959     return false;
1960 }
1961 
1962 bool AMDGPUOperand::isSDWAFP16Operand() const {
1963   return isSDWAOperand(MVT::f16);
1964 }
1965 
1966 bool AMDGPUOperand::isSDWAFP32Operand() const {
1967   return isSDWAOperand(MVT::f32);
1968 }
1969 
1970 bool AMDGPUOperand::isSDWAInt16Operand() const {
1971   return isSDWAOperand(MVT::i16);
1972 }
1973 
1974 bool AMDGPUOperand::isSDWAInt32Operand() const {
1975   return isSDWAOperand(MVT::i32);
1976 }
1977 
1978 bool AMDGPUOperand::isBoolReg() const {
1979   auto FB = AsmParser->getFeatureBits();
1980   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1981                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1982 }
1983 
1984 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1985 {
1986   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1987   assert(Size == 2 || Size == 4 || Size == 8);
1988 
1989   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1990 
1991   if (Imm.Mods.Abs) {
1992     Val &= ~FpSignMask;
1993   }
1994   if (Imm.Mods.Neg) {
1995     Val ^= FpSignMask;
1996   }
1997 
1998   return Val;
1999 }
2000 
2001 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2002   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2003                              Inst.getNumOperands())) {
2004     addLiteralImmOperand(Inst, Imm.Val,
2005                          ApplyModifiers &
2006                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2007   } else {
2008     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2009     Inst.addOperand(MCOperand::createImm(Imm.Val));
2010     setImmKindNone();
2011   }
2012 }
2013 
2014 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2015   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2016   auto OpNum = Inst.getNumOperands();
2017   // Check that this operand accepts literals
2018   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2019 
2020   if (ApplyModifiers) {
2021     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2022     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2023     Val = applyInputFPModifiers(Val, Size);
2024   }
2025 
2026   APInt Literal(64, Val);
2027   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2028 
2029   if (Imm.IsFPImm) { // We got fp literal token
2030     switch (OpTy) {
2031     case AMDGPU::OPERAND_REG_IMM_INT64:
2032     case AMDGPU::OPERAND_REG_IMM_FP64:
2033     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2034     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2036       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2037                                        AsmParser->hasInv2PiInlineImm())) {
2038         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2039         setImmKindConst();
2040         return;
2041       }
2042 
2043       // Non-inlineable
2044       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2045         // For fp operands we check if low 32 bits are zeros
2046         if (Literal.getLoBits(32) != 0) {
2047           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2048           "Can't encode literal as exact 64-bit floating-point operand. "
2049           "Low 32-bits will be set to zero");
2050         }
2051 
2052         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2053         setImmKindLiteral();
2054         return;
2055       }
2056 
2057       // We don't allow fp literals in 64-bit integer instructions. It is
2058       // unclear how we should encode them. This case should be checked earlier
2059       // in predicate methods (isLiteralImm())
2060       llvm_unreachable("fp literal in 64-bit integer instruction.");
2061 
2062     case AMDGPU::OPERAND_REG_IMM_INT32:
2063     case AMDGPU::OPERAND_REG_IMM_FP32:
2064     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2065     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069     case AMDGPU::OPERAND_REG_IMM_INT16:
2070     case AMDGPU::OPERAND_REG_IMM_FP16:
2071     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2072     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2073     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2074     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2075     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2076     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2077     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2078     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2079     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2080     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2081     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2082     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2083     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2084     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2085     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2086     case AMDGPU::OPERAND_KIMM32:
2087     case AMDGPU::OPERAND_KIMM16: {
2088       bool lost;
2089       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2090       // Convert literal to single precision
2091       FPLiteral.convert(*getOpFltSemantics(OpTy),
2092                         APFloat::rmNearestTiesToEven, &lost);
2093       // We allow precision lost but not overflow or underflow. This should be
2094       // checked earlier in isLiteralImm()
2095 
2096       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2097       Inst.addOperand(MCOperand::createImm(ImmVal));
2098       setImmKindLiteral();
2099       return;
2100     }
2101     default:
2102       llvm_unreachable("invalid operand size");
2103     }
2104 
2105     return;
2106   }
2107 
2108   // We got int literal token.
2109   // Only sign extend inline immediates.
2110   switch (OpTy) {
2111   case AMDGPU::OPERAND_REG_IMM_INT32:
2112   case AMDGPU::OPERAND_REG_IMM_FP32:
2113   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2114   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2115   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2116   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2117   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2118   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2119   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2120   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2122   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2123   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2124     if (isSafeTruncation(Val, 32) &&
2125         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2126                                      AsmParser->hasInv2PiInlineImm())) {
2127       Inst.addOperand(MCOperand::createImm(Val));
2128       setImmKindConst();
2129       return;
2130     }
2131 
2132     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2133     setImmKindLiteral();
2134     return;
2135 
2136   case AMDGPU::OPERAND_REG_IMM_INT64:
2137   case AMDGPU::OPERAND_REG_IMM_FP64:
2138   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2139   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2140   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2141     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2142       Inst.addOperand(MCOperand::createImm(Val));
2143       setImmKindConst();
2144       return;
2145     }
2146 
2147     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2148     setImmKindLiteral();
2149     return;
2150 
2151   case AMDGPU::OPERAND_REG_IMM_INT16:
2152   case AMDGPU::OPERAND_REG_IMM_FP16:
2153   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2154   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2155   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2156   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2157   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2158     if (isSafeTruncation(Val, 16) &&
2159         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2160                                      AsmParser->hasInv2PiInlineImm())) {
2161       Inst.addOperand(MCOperand::createImm(Val));
2162       setImmKindConst();
2163       return;
2164     }
2165 
2166     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2167     setImmKindLiteral();
2168     return;
2169 
2170   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2171   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2172   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2173   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2174     assert(isSafeTruncation(Val, 16));
2175     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2176                                         AsmParser->hasInv2PiInlineImm()));
2177 
2178     Inst.addOperand(MCOperand::createImm(Val));
2179     return;
2180   }
2181   case AMDGPU::OPERAND_KIMM32:
2182     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2183     setImmKindNone();
2184     return;
2185   case AMDGPU::OPERAND_KIMM16:
2186     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2187     setImmKindNone();
2188     return;
2189   default:
2190     llvm_unreachable("invalid operand size");
2191   }
2192 }
2193 
2194 template <unsigned Bitwidth>
2195 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2196   APInt Literal(64, Imm.Val);
2197   setImmKindNone();
2198 
2199   if (!Imm.IsFPImm) {
2200     // We got int literal token.
2201     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2202     return;
2203   }
2204 
2205   bool Lost;
2206   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2207   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2208                     APFloat::rmNearestTiesToEven, &Lost);
2209   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2210 }
2211 
2212 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2213   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2214 }
2215 
2216 static bool isInlineValue(unsigned Reg) {
2217   switch (Reg) {
2218   case AMDGPU::SRC_SHARED_BASE:
2219   case AMDGPU::SRC_SHARED_LIMIT:
2220   case AMDGPU::SRC_PRIVATE_BASE:
2221   case AMDGPU::SRC_PRIVATE_LIMIT:
2222   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2223     return true;
2224   case AMDGPU::SRC_VCCZ:
2225   case AMDGPU::SRC_EXECZ:
2226   case AMDGPU::SRC_SCC:
2227     return true;
2228   case AMDGPU::SGPR_NULL:
2229     return true;
2230   default:
2231     return false;
2232   }
2233 }
2234 
2235 bool AMDGPUOperand::isInlineValue() const {
2236   return isRegKind() && ::isInlineValue(getReg());
2237 }
2238 
2239 //===----------------------------------------------------------------------===//
2240 // AsmParser
2241 //===----------------------------------------------------------------------===//
2242 
2243 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2244   if (Is == IS_VGPR) {
2245     switch (RegWidth) {
2246       default: return -1;
2247       case 1: return AMDGPU::VGPR_32RegClassID;
2248       case 2: return AMDGPU::VReg_64RegClassID;
2249       case 3: return AMDGPU::VReg_96RegClassID;
2250       case 4: return AMDGPU::VReg_128RegClassID;
2251       case 5: return AMDGPU::VReg_160RegClassID;
2252       case 6: return AMDGPU::VReg_192RegClassID;
2253       case 7: return AMDGPU::VReg_224RegClassID;
2254       case 8: return AMDGPU::VReg_256RegClassID;
2255       case 16: return AMDGPU::VReg_512RegClassID;
2256       case 32: return AMDGPU::VReg_1024RegClassID;
2257     }
2258   } else if (Is == IS_TTMP) {
2259     switch (RegWidth) {
2260       default: return -1;
2261       case 1: return AMDGPU::TTMP_32RegClassID;
2262       case 2: return AMDGPU::TTMP_64RegClassID;
2263       case 4: return AMDGPU::TTMP_128RegClassID;
2264       case 8: return AMDGPU::TTMP_256RegClassID;
2265       case 16: return AMDGPU::TTMP_512RegClassID;
2266     }
2267   } else if (Is == IS_SGPR) {
2268     switch (RegWidth) {
2269       default: return -1;
2270       case 1: return AMDGPU::SGPR_32RegClassID;
2271       case 2: return AMDGPU::SGPR_64RegClassID;
2272       case 3: return AMDGPU::SGPR_96RegClassID;
2273       case 4: return AMDGPU::SGPR_128RegClassID;
2274       case 5: return AMDGPU::SGPR_160RegClassID;
2275       case 6: return AMDGPU::SGPR_192RegClassID;
2276       case 7: return AMDGPU::SGPR_224RegClassID;
2277       case 8: return AMDGPU::SGPR_256RegClassID;
2278       case 16: return AMDGPU::SGPR_512RegClassID;
2279     }
2280   } else if (Is == IS_AGPR) {
2281     switch (RegWidth) {
2282       default: return -1;
2283       case 1: return AMDGPU::AGPR_32RegClassID;
2284       case 2: return AMDGPU::AReg_64RegClassID;
2285       case 3: return AMDGPU::AReg_96RegClassID;
2286       case 4: return AMDGPU::AReg_128RegClassID;
2287       case 5: return AMDGPU::AReg_160RegClassID;
2288       case 6: return AMDGPU::AReg_192RegClassID;
2289       case 7: return AMDGPU::AReg_224RegClassID;
2290       case 8: return AMDGPU::AReg_256RegClassID;
2291       case 16: return AMDGPU::AReg_512RegClassID;
2292       case 32: return AMDGPU::AReg_1024RegClassID;
2293     }
2294   }
2295   return -1;
2296 }
2297 
2298 static unsigned getSpecialRegForName(StringRef RegName) {
2299   return StringSwitch<unsigned>(RegName)
2300     .Case("exec", AMDGPU::EXEC)
2301     .Case("vcc", AMDGPU::VCC)
2302     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2303     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2304     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2305     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2306     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2307     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2308     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2309     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2310     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2311     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2312     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2313     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2314     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2315     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2316     .Case("m0", AMDGPU::M0)
2317     .Case("vccz", AMDGPU::SRC_VCCZ)
2318     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2319     .Case("execz", AMDGPU::SRC_EXECZ)
2320     .Case("src_execz", AMDGPU::SRC_EXECZ)
2321     .Case("scc", AMDGPU::SRC_SCC)
2322     .Case("src_scc", AMDGPU::SRC_SCC)
2323     .Case("tba", AMDGPU::TBA)
2324     .Case("tma", AMDGPU::TMA)
2325     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2326     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2327     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2328     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2329     .Case("vcc_lo", AMDGPU::VCC_LO)
2330     .Case("vcc_hi", AMDGPU::VCC_HI)
2331     .Case("exec_lo", AMDGPU::EXEC_LO)
2332     .Case("exec_hi", AMDGPU::EXEC_HI)
2333     .Case("tma_lo", AMDGPU::TMA_LO)
2334     .Case("tma_hi", AMDGPU::TMA_HI)
2335     .Case("tba_lo", AMDGPU::TBA_LO)
2336     .Case("tba_hi", AMDGPU::TBA_HI)
2337     .Case("pc", AMDGPU::PC_REG)
2338     .Case("null", AMDGPU::SGPR_NULL)
2339     .Default(AMDGPU::NoRegister);
2340 }
2341 
2342 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2343                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2344   auto R = parseRegister();
2345   if (!R) return true;
2346   assert(R->isReg());
2347   RegNo = R->getReg();
2348   StartLoc = R->getStartLoc();
2349   EndLoc = R->getEndLoc();
2350   return false;
2351 }
2352 
2353 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2354                                     SMLoc &EndLoc) {
2355   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2356 }
2357 
2358 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2359                                                        SMLoc &StartLoc,
2360                                                        SMLoc &EndLoc) {
2361   bool Result =
2362       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2363   bool PendingErrors = getParser().hasPendingError();
2364   getParser().clearPendingErrors();
2365   if (PendingErrors)
2366     return MatchOperand_ParseFail;
2367   if (Result)
2368     return MatchOperand_NoMatch;
2369   return MatchOperand_Success;
2370 }
2371 
2372 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2373                                             RegisterKind RegKind, unsigned Reg1,
2374                                             SMLoc Loc) {
2375   switch (RegKind) {
2376   case IS_SPECIAL:
2377     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2378       Reg = AMDGPU::EXEC;
2379       RegWidth = 2;
2380       return true;
2381     }
2382     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2383       Reg = AMDGPU::FLAT_SCR;
2384       RegWidth = 2;
2385       return true;
2386     }
2387     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2388       Reg = AMDGPU::XNACK_MASK;
2389       RegWidth = 2;
2390       return true;
2391     }
2392     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2393       Reg = AMDGPU::VCC;
2394       RegWidth = 2;
2395       return true;
2396     }
2397     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2398       Reg = AMDGPU::TBA;
2399       RegWidth = 2;
2400       return true;
2401     }
2402     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2403       Reg = AMDGPU::TMA;
2404       RegWidth = 2;
2405       return true;
2406     }
2407     Error(Loc, "register does not fit in the list");
2408     return false;
2409   case IS_VGPR:
2410   case IS_SGPR:
2411   case IS_AGPR:
2412   case IS_TTMP:
2413     if (Reg1 != Reg + RegWidth) {
2414       Error(Loc, "registers in a list must have consecutive indices");
2415       return false;
2416     }
2417     RegWidth++;
2418     return true;
2419   default:
2420     llvm_unreachable("unexpected register kind");
2421   }
2422 }
2423 
2424 struct RegInfo {
2425   StringLiteral Name;
2426   RegisterKind Kind;
2427 };
2428 
2429 static constexpr RegInfo RegularRegisters[] = {
2430   {{"v"},    IS_VGPR},
2431   {{"s"},    IS_SGPR},
2432   {{"ttmp"}, IS_TTMP},
2433   {{"acc"},  IS_AGPR},
2434   {{"a"},    IS_AGPR},
2435 };
2436 
2437 static bool isRegularReg(RegisterKind Kind) {
2438   return Kind == IS_VGPR ||
2439          Kind == IS_SGPR ||
2440          Kind == IS_TTMP ||
2441          Kind == IS_AGPR;
2442 }
2443 
2444 static const RegInfo* getRegularRegInfo(StringRef Str) {
2445   for (const RegInfo &Reg : RegularRegisters)
2446     if (Str.startswith(Reg.Name))
2447       return &Reg;
2448   return nullptr;
2449 }
2450 
2451 static bool getRegNum(StringRef Str, unsigned& Num) {
2452   return !Str.getAsInteger(10, Num);
2453 }
2454 
2455 bool
2456 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2457                             const AsmToken &NextToken) const {
2458 
2459   // A list of consecutive registers: [s0,s1,s2,s3]
2460   if (Token.is(AsmToken::LBrac))
2461     return true;
2462 
2463   if (!Token.is(AsmToken::Identifier))
2464     return false;
2465 
2466   // A single register like s0 or a range of registers like s[0:1]
2467 
2468   StringRef Str = Token.getString();
2469   const RegInfo *Reg = getRegularRegInfo(Str);
2470   if (Reg) {
2471     StringRef RegName = Reg->Name;
2472     StringRef RegSuffix = Str.substr(RegName.size());
2473     if (!RegSuffix.empty()) {
2474       unsigned Num;
2475       // A single register with an index: rXX
2476       if (getRegNum(RegSuffix, Num))
2477         return true;
2478     } else {
2479       // A range of registers: r[XX:YY].
2480       if (NextToken.is(AsmToken::LBrac))
2481         return true;
2482     }
2483   }
2484 
2485   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2486 }
2487 
2488 bool
2489 AMDGPUAsmParser::isRegister()
2490 {
2491   return isRegister(getToken(), peekToken());
2492 }
2493 
2494 unsigned
2495 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2496                                unsigned RegNum,
2497                                unsigned RegWidth,
2498                                SMLoc Loc) {
2499 
2500   assert(isRegularReg(RegKind));
2501 
2502   unsigned AlignSize = 1;
2503   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2504     // SGPR and TTMP registers must be aligned.
2505     // Max required alignment is 4 dwords.
2506     AlignSize = std::min(RegWidth, 4u);
2507   }
2508 
2509   if (RegNum % AlignSize != 0) {
2510     Error(Loc, "invalid register alignment");
2511     return AMDGPU::NoRegister;
2512   }
2513 
2514   unsigned RegIdx = RegNum / AlignSize;
2515   int RCID = getRegClass(RegKind, RegWidth);
2516   if (RCID == -1) {
2517     Error(Loc, "invalid or unsupported register size");
2518     return AMDGPU::NoRegister;
2519   }
2520 
2521   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2522   const MCRegisterClass RC = TRI->getRegClass(RCID);
2523   if (RegIdx >= RC.getNumRegs()) {
2524     Error(Loc, "register index is out of range");
2525     return AMDGPU::NoRegister;
2526   }
2527 
2528   return RC.getRegister(RegIdx);
2529 }
2530 
2531 bool
2532 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2533   int64_t RegLo, RegHi;
2534   if (!skipToken(AsmToken::LBrac, "missing register index"))
2535     return false;
2536 
2537   SMLoc FirstIdxLoc = getLoc();
2538   SMLoc SecondIdxLoc;
2539 
2540   if (!parseExpr(RegLo))
2541     return false;
2542 
2543   if (trySkipToken(AsmToken::Colon)) {
2544     SecondIdxLoc = getLoc();
2545     if (!parseExpr(RegHi))
2546       return false;
2547   } else {
2548     RegHi = RegLo;
2549   }
2550 
2551   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2552     return false;
2553 
2554   if (!isUInt<32>(RegLo)) {
2555     Error(FirstIdxLoc, "invalid register index");
2556     return false;
2557   }
2558 
2559   if (!isUInt<32>(RegHi)) {
2560     Error(SecondIdxLoc, "invalid register index");
2561     return false;
2562   }
2563 
2564   if (RegLo > RegHi) {
2565     Error(FirstIdxLoc, "first register index should not exceed second index");
2566     return false;
2567   }
2568 
2569   Num = static_cast<unsigned>(RegLo);
2570   Width = (RegHi - RegLo) + 1;
2571   return true;
2572 }
2573 
2574 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2575                                           unsigned &RegNum, unsigned &RegWidth,
2576                                           SmallVectorImpl<AsmToken> &Tokens) {
2577   assert(isToken(AsmToken::Identifier));
2578   unsigned Reg = getSpecialRegForName(getTokenStr());
2579   if (Reg) {
2580     RegNum = 0;
2581     RegWidth = 1;
2582     RegKind = IS_SPECIAL;
2583     Tokens.push_back(getToken());
2584     lex(); // skip register name
2585   }
2586   return Reg;
2587 }
2588 
2589 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2590                                           unsigned &RegNum, unsigned &RegWidth,
2591                                           SmallVectorImpl<AsmToken> &Tokens) {
2592   assert(isToken(AsmToken::Identifier));
2593   StringRef RegName = getTokenStr();
2594   auto Loc = getLoc();
2595 
2596   const RegInfo *RI = getRegularRegInfo(RegName);
2597   if (!RI) {
2598     Error(Loc, "invalid register name");
2599     return AMDGPU::NoRegister;
2600   }
2601 
2602   Tokens.push_back(getToken());
2603   lex(); // skip register name
2604 
2605   RegKind = RI->Kind;
2606   StringRef RegSuffix = RegName.substr(RI->Name.size());
2607   if (!RegSuffix.empty()) {
2608     // Single 32-bit register: vXX.
2609     if (!getRegNum(RegSuffix, RegNum)) {
2610       Error(Loc, "invalid register index");
2611       return AMDGPU::NoRegister;
2612     }
2613     RegWidth = 1;
2614   } else {
2615     // Range of registers: v[XX:YY]. ":YY" is optional.
2616     if (!ParseRegRange(RegNum, RegWidth))
2617       return AMDGPU::NoRegister;
2618   }
2619 
2620   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2621 }
2622 
2623 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2624                                        unsigned &RegWidth,
2625                                        SmallVectorImpl<AsmToken> &Tokens) {
2626   unsigned Reg = AMDGPU::NoRegister;
2627   auto ListLoc = getLoc();
2628 
2629   if (!skipToken(AsmToken::LBrac,
2630                  "expected a register or a list of registers")) {
2631     return AMDGPU::NoRegister;
2632   }
2633 
2634   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2635 
2636   auto Loc = getLoc();
2637   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2638     return AMDGPU::NoRegister;
2639   if (RegWidth != 1) {
2640     Error(Loc, "expected a single 32-bit register");
2641     return AMDGPU::NoRegister;
2642   }
2643 
2644   for (; trySkipToken(AsmToken::Comma); ) {
2645     RegisterKind NextRegKind;
2646     unsigned NextReg, NextRegNum, NextRegWidth;
2647     Loc = getLoc();
2648 
2649     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2650                              NextRegNum, NextRegWidth,
2651                              Tokens)) {
2652       return AMDGPU::NoRegister;
2653     }
2654     if (NextRegWidth != 1) {
2655       Error(Loc, "expected a single 32-bit register");
2656       return AMDGPU::NoRegister;
2657     }
2658     if (NextRegKind != RegKind) {
2659       Error(Loc, "registers in a list must be of the same kind");
2660       return AMDGPU::NoRegister;
2661     }
2662     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2663       return AMDGPU::NoRegister;
2664   }
2665 
2666   if (!skipToken(AsmToken::RBrac,
2667                  "expected a comma or a closing square bracket")) {
2668     return AMDGPU::NoRegister;
2669   }
2670 
2671   if (isRegularReg(RegKind))
2672     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2673 
2674   return Reg;
2675 }
2676 
2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2678                                           unsigned &RegNum, unsigned &RegWidth,
2679                                           SmallVectorImpl<AsmToken> &Tokens) {
2680   auto Loc = getLoc();
2681   Reg = AMDGPU::NoRegister;
2682 
2683   if (isToken(AsmToken::Identifier)) {
2684     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2685     if (Reg == AMDGPU::NoRegister)
2686       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2687   } else {
2688     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2689   }
2690 
2691   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2692   if (Reg == AMDGPU::NoRegister) {
2693     assert(Parser.hasPendingError());
2694     return false;
2695   }
2696 
2697   if (!subtargetHasRegister(*TRI, Reg)) {
2698     if (Reg == AMDGPU::SGPR_NULL) {
2699       Error(Loc, "'null' operand is not supported on this GPU");
2700     } else {
2701       Error(Loc, "register not available on this GPU");
2702     }
2703     return false;
2704   }
2705 
2706   return true;
2707 }
2708 
2709 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2710                                           unsigned &RegNum, unsigned &RegWidth,
2711                                           bool RestoreOnFailure /*=false*/) {
2712   Reg = AMDGPU::NoRegister;
2713 
2714   SmallVector<AsmToken, 1> Tokens;
2715   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2716     if (RestoreOnFailure) {
2717       while (!Tokens.empty()) {
2718         getLexer().UnLex(Tokens.pop_back_val());
2719       }
2720     }
2721     return true;
2722   }
2723   return false;
2724 }
2725 
2726 Optional<StringRef>
2727 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2728   switch (RegKind) {
2729   case IS_VGPR:
2730     return StringRef(".amdgcn.next_free_vgpr");
2731   case IS_SGPR:
2732     return StringRef(".amdgcn.next_free_sgpr");
2733   default:
2734     return None;
2735   }
2736 }
2737 
2738 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2739   auto SymbolName = getGprCountSymbolName(RegKind);
2740   assert(SymbolName && "initializing invalid register kind");
2741   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2742   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2743 }
2744 
2745 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2746                                             unsigned DwordRegIndex,
2747                                             unsigned RegWidth) {
2748   // Symbols are only defined for GCN targets
2749   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2750     return true;
2751 
2752   auto SymbolName = getGprCountSymbolName(RegKind);
2753   if (!SymbolName)
2754     return true;
2755   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2756 
2757   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2758   int64_t OldCount;
2759 
2760   if (!Sym->isVariable())
2761     return !Error(getLoc(),
2762                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2763   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2764     return !Error(
2765         getLoc(),
2766         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2767 
2768   if (OldCount <= NewMax)
2769     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2770 
2771   return true;
2772 }
2773 
2774 std::unique_ptr<AMDGPUOperand>
2775 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2776   const auto &Tok = getToken();
2777   SMLoc StartLoc = Tok.getLoc();
2778   SMLoc EndLoc = Tok.getEndLoc();
2779   RegisterKind RegKind;
2780   unsigned Reg, RegNum, RegWidth;
2781 
2782   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2783     return nullptr;
2784   }
2785   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2786     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2787       return nullptr;
2788   } else
2789     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2790   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2791 }
2792 
2793 OperandMatchResultTy
2794 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2795   // TODO: add syntactic sugar for 1/(2*PI)
2796 
2797   assert(!isRegister());
2798   assert(!isModifier());
2799 
2800   const auto& Tok = getToken();
2801   const auto& NextTok = peekToken();
2802   bool IsReal = Tok.is(AsmToken::Real);
2803   SMLoc S = getLoc();
2804   bool Negate = false;
2805 
2806   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2807     lex();
2808     IsReal = true;
2809     Negate = true;
2810   }
2811 
2812   if (IsReal) {
2813     // Floating-point expressions are not supported.
2814     // Can only allow floating-point literals with an
2815     // optional sign.
2816 
2817     StringRef Num = getTokenStr();
2818     lex();
2819 
2820     APFloat RealVal(APFloat::IEEEdouble());
2821     auto roundMode = APFloat::rmNearestTiesToEven;
2822     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2823       return MatchOperand_ParseFail;
2824     }
2825     if (Negate)
2826       RealVal.changeSign();
2827 
2828     Operands.push_back(
2829       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2830                                AMDGPUOperand::ImmTyNone, true));
2831 
2832     return MatchOperand_Success;
2833 
2834   } else {
2835     int64_t IntVal;
2836     const MCExpr *Expr;
2837     SMLoc S = getLoc();
2838 
2839     if (HasSP3AbsModifier) {
2840       // This is a workaround for handling expressions
2841       // as arguments of SP3 'abs' modifier, for example:
2842       //     |1.0|
2843       //     |-1|
2844       //     |1+x|
2845       // This syntax is not compatible with syntax of standard
2846       // MC expressions (due to the trailing '|').
2847       SMLoc EndLoc;
2848       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2849         return MatchOperand_ParseFail;
2850     } else {
2851       if (Parser.parseExpression(Expr))
2852         return MatchOperand_ParseFail;
2853     }
2854 
2855     if (Expr->evaluateAsAbsolute(IntVal)) {
2856       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2857     } else {
2858       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2859     }
2860 
2861     return MatchOperand_Success;
2862   }
2863 
2864   return MatchOperand_NoMatch;
2865 }
2866 
2867 OperandMatchResultTy
2868 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2869   if (!isRegister())
2870     return MatchOperand_NoMatch;
2871 
2872   if (auto R = parseRegister()) {
2873     assert(R->isReg());
2874     Operands.push_back(std::move(R));
2875     return MatchOperand_Success;
2876   }
2877   return MatchOperand_ParseFail;
2878 }
2879 
2880 OperandMatchResultTy
2881 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2882   auto res = parseReg(Operands);
2883   if (res != MatchOperand_NoMatch) {
2884     return res;
2885   } else if (isModifier()) {
2886     return MatchOperand_NoMatch;
2887   } else {
2888     return parseImm(Operands, HasSP3AbsMod);
2889   }
2890 }
2891 
2892 bool
2893 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2894   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2895     const auto &str = Token.getString();
2896     return str == "abs" || str == "neg" || str == "sext";
2897   }
2898   return false;
2899 }
2900 
2901 bool
2902 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2903   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2904 }
2905 
2906 bool
2907 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2908   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2909 }
2910 
2911 bool
2912 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2913   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2914 }
2915 
2916 // Check if this is an operand modifier or an opcode modifier
2917 // which may look like an expression but it is not. We should
2918 // avoid parsing these modifiers as expressions. Currently
2919 // recognized sequences are:
2920 //   |...|
2921 //   abs(...)
2922 //   neg(...)
2923 //   sext(...)
2924 //   -reg
2925 //   -|...|
2926 //   -abs(...)
2927 //   name:...
2928 // Note that simple opcode modifiers like 'gds' may be parsed as
2929 // expressions; this is a special case. See getExpressionAsToken.
2930 //
2931 bool
2932 AMDGPUAsmParser::isModifier() {
2933 
2934   AsmToken Tok = getToken();
2935   AsmToken NextToken[2];
2936   peekTokens(NextToken);
2937 
2938   return isOperandModifier(Tok, NextToken[0]) ||
2939          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2940          isOpcodeModifierWithVal(Tok, NextToken[0]);
2941 }
2942 
2943 // Check if the current token is an SP3 'neg' modifier.
2944 // Currently this modifier is allowed in the following context:
2945 //
2946 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2947 // 2. Before an 'abs' modifier: -abs(...)
2948 // 3. Before an SP3 'abs' modifier: -|...|
2949 //
2950 // In all other cases "-" is handled as a part
2951 // of an expression that follows the sign.
2952 //
2953 // Note: When "-" is followed by an integer literal,
2954 // this is interpreted as integer negation rather
2955 // than a floating-point NEG modifier applied to N.
2956 // Beside being contr-intuitive, such use of floating-point
2957 // NEG modifier would have resulted in different meaning
2958 // of integer literals used with VOP1/2/C and VOP3,
2959 // for example:
2960 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2961 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2962 // Negative fp literals with preceding "-" are
2963 // handled likewise for uniformity
2964 //
2965 bool
2966 AMDGPUAsmParser::parseSP3NegModifier() {
2967 
2968   AsmToken NextToken[2];
2969   peekTokens(NextToken);
2970 
2971   if (isToken(AsmToken::Minus) &&
2972       (isRegister(NextToken[0], NextToken[1]) ||
2973        NextToken[0].is(AsmToken::Pipe) ||
2974        isId(NextToken[0], "abs"))) {
2975     lex();
2976     return true;
2977   }
2978 
2979   return false;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2984                                               bool AllowImm) {
2985   bool Neg, SP3Neg;
2986   bool Abs, SP3Abs;
2987   SMLoc Loc;
2988 
2989   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2990   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2991     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2992     return MatchOperand_ParseFail;
2993   }
2994 
2995   SP3Neg = parseSP3NegModifier();
2996 
2997   Loc = getLoc();
2998   Neg = trySkipId("neg");
2999   if (Neg && SP3Neg) {
3000     Error(Loc, "expected register or immediate");
3001     return MatchOperand_ParseFail;
3002   }
3003   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3004     return MatchOperand_ParseFail;
3005 
3006   Abs = trySkipId("abs");
3007   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3008     return MatchOperand_ParseFail;
3009 
3010   Loc = getLoc();
3011   SP3Abs = trySkipToken(AsmToken::Pipe);
3012   if (Abs && SP3Abs) {
3013     Error(Loc, "expected register or immediate");
3014     return MatchOperand_ParseFail;
3015   }
3016 
3017   OperandMatchResultTy Res;
3018   if (AllowImm) {
3019     Res = parseRegOrImm(Operands, SP3Abs);
3020   } else {
3021     Res = parseReg(Operands);
3022   }
3023   if (Res != MatchOperand_Success) {
3024     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3025   }
3026 
3027   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3028     return MatchOperand_ParseFail;
3029   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3030     return MatchOperand_ParseFail;
3031   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3032     return MatchOperand_ParseFail;
3033 
3034   AMDGPUOperand::Modifiers Mods;
3035   Mods.Abs = Abs || SP3Abs;
3036   Mods.Neg = Neg || SP3Neg;
3037 
3038   if (Mods.hasFPModifiers()) {
3039     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3040     if (Op.isExpr()) {
3041       Error(Op.getStartLoc(), "expected an absolute expression");
3042       return MatchOperand_ParseFail;
3043     }
3044     Op.setModifiers(Mods);
3045   }
3046   return MatchOperand_Success;
3047 }
3048 
3049 OperandMatchResultTy
3050 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3051                                                bool AllowImm) {
3052   bool Sext = trySkipId("sext");
3053   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3054     return MatchOperand_ParseFail;
3055 
3056   OperandMatchResultTy Res;
3057   if (AllowImm) {
3058     Res = parseRegOrImm(Operands);
3059   } else {
3060     Res = parseReg(Operands);
3061   }
3062   if (Res != MatchOperand_Success) {
3063     return Sext? MatchOperand_ParseFail : Res;
3064   }
3065 
3066   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3067     return MatchOperand_ParseFail;
3068 
3069   AMDGPUOperand::Modifiers Mods;
3070   Mods.Sext = Sext;
3071 
3072   if (Mods.hasIntModifiers()) {
3073     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3074     if (Op.isExpr()) {
3075       Error(Op.getStartLoc(), "expected an absolute expression");
3076       return MatchOperand_ParseFail;
3077     }
3078     Op.setModifiers(Mods);
3079   }
3080 
3081   return MatchOperand_Success;
3082 }
3083 
3084 OperandMatchResultTy
3085 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3086   return parseRegOrImmWithFPInputMods(Operands, false);
3087 }
3088 
3089 OperandMatchResultTy
3090 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3091   return parseRegOrImmWithIntInputMods(Operands, false);
3092 }
3093 
3094 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3095   auto Loc = getLoc();
3096   if (trySkipId("off")) {
3097     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3098                                                 AMDGPUOperand::ImmTyOff, false));
3099     return MatchOperand_Success;
3100   }
3101 
3102   if (!isRegister())
3103     return MatchOperand_NoMatch;
3104 
3105   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3106   if (Reg) {
3107     Operands.push_back(std::move(Reg));
3108     return MatchOperand_Success;
3109   }
3110 
3111   return MatchOperand_ParseFail;
3112 
3113 }
3114 
3115 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3116   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3117 
3118   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3119       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3120       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3121       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3122     return Match_InvalidOperand;
3123 
3124   if ((TSFlags & SIInstrFlags::VOP3) &&
3125       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3126       getForcedEncodingSize() != 64)
3127     return Match_PreferE32;
3128 
3129   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3130       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3131     // v_mac_f32/16 allow only dst_sel == DWORD;
3132     auto OpNum =
3133         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3134     const auto &Op = Inst.getOperand(OpNum);
3135     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3136       return Match_InvalidOperand;
3137     }
3138   }
3139 
3140   return Match_Success;
3141 }
3142 
3143 static ArrayRef<unsigned> getAllVariants() {
3144   static const unsigned Variants[] = {
3145     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3146     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3147   };
3148 
3149   return makeArrayRef(Variants);
3150 }
3151 
3152 // What asm variants we should check
3153 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3154   if (getForcedEncodingSize() == 32) {
3155     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3156     return makeArrayRef(Variants);
3157   }
3158 
3159   if (isForcedVOP3()) {
3160     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3161     return makeArrayRef(Variants);
3162   }
3163 
3164   if (isForcedSDWA()) {
3165     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3166                                         AMDGPUAsmVariants::SDWA9};
3167     return makeArrayRef(Variants);
3168   }
3169 
3170   if (isForcedDPP()) {
3171     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3172     return makeArrayRef(Variants);
3173   }
3174 
3175   return getAllVariants();
3176 }
3177 
3178 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3179   if (getForcedEncodingSize() == 32)
3180     return "e32";
3181 
3182   if (isForcedVOP3())
3183     return "e64";
3184 
3185   if (isForcedSDWA())
3186     return "sdwa";
3187 
3188   if (isForcedDPP())
3189     return "dpp";
3190 
3191   return "";
3192 }
3193 
3194 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3195   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3196   const unsigned Num = Desc.getNumImplicitUses();
3197   for (unsigned i = 0; i < Num; ++i) {
3198     unsigned Reg = Desc.ImplicitUses[i];
3199     switch (Reg) {
3200     case AMDGPU::FLAT_SCR:
3201     case AMDGPU::VCC:
3202     case AMDGPU::VCC_LO:
3203     case AMDGPU::VCC_HI:
3204     case AMDGPU::M0:
3205       return Reg;
3206     default:
3207       break;
3208     }
3209   }
3210   return AMDGPU::NoRegister;
3211 }
3212 
3213 // NB: This code is correct only when used to check constant
3214 // bus limitations because GFX7 support no f16 inline constants.
3215 // Note that there are no cases when a GFX7 opcode violates
3216 // constant bus limitations due to the use of an f16 constant.
3217 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3218                                        unsigned OpIdx) const {
3219   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3220 
3221   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3222     return false;
3223   }
3224 
3225   const MCOperand &MO = Inst.getOperand(OpIdx);
3226 
3227   int64_t Val = MO.getImm();
3228   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3229 
3230   switch (OpSize) { // expected operand size
3231   case 8:
3232     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3233   case 4:
3234     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3235   case 2: {
3236     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3237     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3238         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3239         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3240       return AMDGPU::isInlinableIntLiteral(Val);
3241 
3242     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3243         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3245       return AMDGPU::isInlinableIntLiteralV216(Val);
3246 
3247     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3248         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3249         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3250       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3251 
3252     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3253   }
3254   default:
3255     llvm_unreachable("invalid operand size");
3256   }
3257 }
3258 
3259 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3260   if (!isGFX10Plus())
3261     return 1;
3262 
3263   switch (Opcode) {
3264   // 64-bit shift instructions can use only one scalar value input
3265   case AMDGPU::V_LSHLREV_B64_e64:
3266   case AMDGPU::V_LSHLREV_B64_gfx10:
3267   case AMDGPU::V_LSHRREV_B64_e64:
3268   case AMDGPU::V_LSHRREV_B64_gfx10:
3269   case AMDGPU::V_ASHRREV_I64_e64:
3270   case AMDGPU::V_ASHRREV_I64_gfx10:
3271   case AMDGPU::V_LSHL_B64_e64:
3272   case AMDGPU::V_LSHR_B64_e64:
3273   case AMDGPU::V_ASHR_I64_e64:
3274     return 1;
3275   default:
3276     return 2;
3277   }
3278 }
3279 
3280 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3281   const MCOperand &MO = Inst.getOperand(OpIdx);
3282   if (MO.isImm()) {
3283     return !isInlineConstant(Inst, OpIdx);
3284   } else if (MO.isReg()) {
3285     auto Reg = MO.getReg();
3286     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3287     auto PReg = mc2PseudoReg(Reg);
3288     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3289   } else {
3290     return true;
3291   }
3292 }
3293 
3294 bool
3295 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3296                                                 const OperandVector &Operands) {
3297   const unsigned Opcode = Inst.getOpcode();
3298   const MCInstrDesc &Desc = MII.get(Opcode);
3299   unsigned LastSGPR = AMDGPU::NoRegister;
3300   unsigned ConstantBusUseCount = 0;
3301   unsigned NumLiterals = 0;
3302   unsigned LiteralSize;
3303 
3304   if (Desc.TSFlags &
3305       (SIInstrFlags::VOPC |
3306        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3307        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3308        SIInstrFlags::SDWA)) {
3309     // Check special imm operands (used by madmk, etc)
3310     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3311       ++NumLiterals;
3312       LiteralSize = 4;
3313     }
3314 
3315     SmallDenseSet<unsigned> SGPRsUsed;
3316     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3317     if (SGPRUsed != AMDGPU::NoRegister) {
3318       SGPRsUsed.insert(SGPRUsed);
3319       ++ConstantBusUseCount;
3320     }
3321 
3322     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3323     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3324     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3325 
3326     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3327 
3328     for (int OpIdx : OpIndices) {
3329       if (OpIdx == -1) break;
3330 
3331       const MCOperand &MO = Inst.getOperand(OpIdx);
3332       if (usesConstantBus(Inst, OpIdx)) {
3333         if (MO.isReg()) {
3334           LastSGPR = mc2PseudoReg(MO.getReg());
3335           // Pairs of registers with a partial intersections like these
3336           //   s0, s[0:1]
3337           //   flat_scratch_lo, flat_scratch
3338           //   flat_scratch_lo, flat_scratch_hi
3339           // are theoretically valid but they are disabled anyway.
3340           // Note that this code mimics SIInstrInfo::verifyInstruction
3341           if (!SGPRsUsed.count(LastSGPR)) {
3342             SGPRsUsed.insert(LastSGPR);
3343             ++ConstantBusUseCount;
3344           }
3345         } else { // Expression or a literal
3346 
3347           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3348             continue; // special operand like VINTERP attr_chan
3349 
3350           // An instruction may use only one literal.
3351           // This has been validated on the previous step.
3352           // See validateVOPLiteral.
3353           // This literal may be used as more than one operand.
3354           // If all these operands are of the same size,
3355           // this literal counts as one scalar value.
3356           // Otherwise it counts as 2 scalar values.
3357           // See "GFX10 Shader Programming", section 3.6.2.3.
3358 
3359           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3360           if (Size < 4) Size = 4;
3361 
3362           if (NumLiterals == 0) {
3363             NumLiterals = 1;
3364             LiteralSize = Size;
3365           } else if (LiteralSize != Size) {
3366             NumLiterals = 2;
3367           }
3368         }
3369       }
3370     }
3371   }
3372   ConstantBusUseCount += NumLiterals;
3373 
3374   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3375     return true;
3376 
3377   SMLoc LitLoc = getLitLoc(Operands);
3378   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3379   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3380   Error(Loc, "invalid operand (violates constant bus restrictions)");
3381   return false;
3382 }
3383 
3384 bool
3385 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3386                                                  const OperandVector &Operands) {
3387   const unsigned Opcode = Inst.getOpcode();
3388   const MCInstrDesc &Desc = MII.get(Opcode);
3389 
3390   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3391   if (DstIdx == -1 ||
3392       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3393     return true;
3394   }
3395 
3396   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3397 
3398   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3399   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3400   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3401 
3402   assert(DstIdx != -1);
3403   const MCOperand &Dst = Inst.getOperand(DstIdx);
3404   assert(Dst.isReg());
3405 
3406   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3407 
3408   for (int SrcIdx : SrcIndices) {
3409     if (SrcIdx == -1) break;
3410     const MCOperand &Src = Inst.getOperand(SrcIdx);
3411     if (Src.isReg()) {
3412       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3413         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3414         Error(getRegLoc(SrcReg, Operands),
3415           "destination must be different than all sources");
3416         return false;
3417       }
3418     }
3419   }
3420 
3421   return true;
3422 }
3423 
3424 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3425 
3426   const unsigned Opc = Inst.getOpcode();
3427   const MCInstrDesc &Desc = MII.get(Opc);
3428 
3429   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3430     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3431     assert(ClampIdx != -1);
3432     return Inst.getOperand(ClampIdx).getImm() == 0;
3433   }
3434 
3435   return true;
3436 }
3437 
3438 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3439 
3440   const unsigned Opc = Inst.getOpcode();
3441   const MCInstrDesc &Desc = MII.get(Opc);
3442 
3443   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3444     return true;
3445 
3446   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3447   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3448   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3449 
3450   assert(VDataIdx != -1);
3451 
3452   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3453     return true;
3454 
3455   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3456   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3457   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3458   if (DMask == 0)
3459     DMask = 1;
3460 
3461   unsigned DataSize =
3462     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3463   if (hasPackedD16()) {
3464     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3465     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3466       DataSize = (DataSize + 1) / 2;
3467   }
3468 
3469   return (VDataSize / 4) == DataSize + TFESize;
3470 }
3471 
3472 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3473   const unsigned Opc = Inst.getOpcode();
3474   const MCInstrDesc &Desc = MII.get(Opc);
3475 
3476   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3477     return true;
3478 
3479   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3480 
3481   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3482       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3483   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3484   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3485   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3486   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3487 
3488   assert(VAddr0Idx != -1);
3489   assert(SrsrcIdx != -1);
3490   assert(SrsrcIdx > VAddr0Idx);
3491 
3492   if (DimIdx == -1)
3493     return true; // intersect_ray
3494 
3495   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3496   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3497   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3498   unsigned ActualAddrSize =
3499       IsNSA ? SrsrcIdx - VAddr0Idx
3500             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3501   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3502 
3503   unsigned ExpectedAddrSize =
3504       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3505 
3506   if (!IsNSA) {
3507     if (ExpectedAddrSize > 8)
3508       ExpectedAddrSize = 16;
3509 
3510     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3511     // This provides backward compatibility for assembly created
3512     // before 160b/192b/224b types were directly supported.
3513     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3514       return true;
3515   }
3516 
3517   return ActualAddrSize == ExpectedAddrSize;
3518 }
3519 
3520 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3521 
3522   const unsigned Opc = Inst.getOpcode();
3523   const MCInstrDesc &Desc = MII.get(Opc);
3524 
3525   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3526     return true;
3527   if (!Desc.mayLoad() || !Desc.mayStore())
3528     return true; // Not atomic
3529 
3530   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3531   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3532 
3533   // This is an incomplete check because image_atomic_cmpswap
3534   // may only use 0x3 and 0xf while other atomic operations
3535   // may use 0x1 and 0x3. However these limitations are
3536   // verified when we check that dmask matches dst size.
3537   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3538 }
3539 
3540 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3541 
3542   const unsigned Opc = Inst.getOpcode();
3543   const MCInstrDesc &Desc = MII.get(Opc);
3544 
3545   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3546     return true;
3547 
3548   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3549   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3550 
3551   // GATHER4 instructions use dmask in a different fashion compared to
3552   // other MIMG instructions. The only useful DMASK values are
3553   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3554   // (red,red,red,red) etc.) The ISA document doesn't mention
3555   // this.
3556   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3557 }
3558 
3559 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3560   const unsigned Opc = Inst.getOpcode();
3561   const MCInstrDesc &Desc = MII.get(Opc);
3562 
3563   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3564     return true;
3565 
3566   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3567   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3568       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3569 
3570   if (!BaseOpcode->MSAA)
3571     return true;
3572 
3573   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3574   assert(DimIdx != -1);
3575 
3576   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3577   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3578 
3579   return DimInfo->MSAA;
3580 }
3581 
3582 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3583 {
3584   switch (Opcode) {
3585   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3586   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3587   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3588     return true;
3589   default:
3590     return false;
3591   }
3592 }
3593 
3594 // movrels* opcodes should only allow VGPRS as src0.
3595 // This is specified in .td description for vop1/vop3,
3596 // but sdwa is handled differently. See isSDWAOperand.
3597 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3598                                       const OperandVector &Operands) {
3599 
3600   const unsigned Opc = Inst.getOpcode();
3601   const MCInstrDesc &Desc = MII.get(Opc);
3602 
3603   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3604     return true;
3605 
3606   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3607   assert(Src0Idx != -1);
3608 
3609   SMLoc ErrLoc;
3610   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3611   if (Src0.isReg()) {
3612     auto Reg = mc2PseudoReg(Src0.getReg());
3613     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3614     if (!isSGPR(Reg, TRI))
3615       return true;
3616     ErrLoc = getRegLoc(Reg, Operands);
3617   } else {
3618     ErrLoc = getConstLoc(Operands);
3619   }
3620 
3621   Error(ErrLoc, "source operand must be a VGPR");
3622   return false;
3623 }
3624 
3625 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3626                                           const OperandVector &Operands) {
3627 
3628   const unsigned Opc = Inst.getOpcode();
3629 
3630   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3631     return true;
3632 
3633   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3634   assert(Src0Idx != -1);
3635 
3636   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3637   if (!Src0.isReg())
3638     return true;
3639 
3640   auto Reg = mc2PseudoReg(Src0.getReg());
3641   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3642   if (isSGPR(Reg, TRI)) {
3643     Error(getRegLoc(Reg, Operands),
3644           "source operand must be either a VGPR or an inline constant");
3645     return false;
3646   }
3647 
3648   return true;
3649 }
3650 
3651 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3652                                    const OperandVector &Operands) {
3653   const unsigned Opc = Inst.getOpcode();
3654   const MCInstrDesc &Desc = MII.get(Opc);
3655 
3656   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3657     return true;
3658 
3659   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3660   if (Src2Idx == -1)
3661     return true;
3662 
3663   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3664   if (!Src2.isReg())
3665     return true;
3666 
3667   MCRegister Src2Reg = Src2.getReg();
3668   MCRegister DstReg = Inst.getOperand(0).getReg();
3669   if (Src2Reg == DstReg)
3670     return true;
3671 
3672   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3673   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3674     return true;
3675 
3676   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3677     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3678           "source 2 operand must not partially overlap with dst");
3679     return false;
3680   }
3681 
3682   return true;
3683 }
3684 
3685 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3686   switch (Inst.getOpcode()) {
3687   default:
3688     return true;
3689   case V_DIV_SCALE_F32_gfx6_gfx7:
3690   case V_DIV_SCALE_F32_vi:
3691   case V_DIV_SCALE_F32_gfx10:
3692   case V_DIV_SCALE_F64_gfx6_gfx7:
3693   case V_DIV_SCALE_F64_vi:
3694   case V_DIV_SCALE_F64_gfx10:
3695     break;
3696   }
3697 
3698   // TODO: Check that src0 = src1 or src2.
3699 
3700   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3701                     AMDGPU::OpName::src2_modifiers,
3702                     AMDGPU::OpName::src2_modifiers}) {
3703     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3704             .getImm() &
3705         SISrcMods::ABS) {
3706       return false;
3707     }
3708   }
3709 
3710   return true;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3714 
3715   const unsigned Opc = Inst.getOpcode();
3716   const MCInstrDesc &Desc = MII.get(Opc);
3717 
3718   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3719     return true;
3720 
3721   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3722   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3723     if (isCI() || isSI())
3724       return false;
3725   }
3726 
3727   return true;
3728 }
3729 
3730 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3731   const unsigned Opc = Inst.getOpcode();
3732   const MCInstrDesc &Desc = MII.get(Opc);
3733 
3734   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3735     return true;
3736 
3737   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3738   if (DimIdx < 0)
3739     return true;
3740 
3741   long Imm = Inst.getOperand(DimIdx).getImm();
3742   if (Imm < 0 || Imm >= 8)
3743     return false;
3744 
3745   return true;
3746 }
3747 
3748 static bool IsRevOpcode(const unsigned Opcode)
3749 {
3750   switch (Opcode) {
3751   case AMDGPU::V_SUBREV_F32_e32:
3752   case AMDGPU::V_SUBREV_F32_e64:
3753   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3754   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3755   case AMDGPU::V_SUBREV_F32_e32_vi:
3756   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3757   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3758   case AMDGPU::V_SUBREV_F32_e64_vi:
3759 
3760   case AMDGPU::V_SUBREV_CO_U32_e32:
3761   case AMDGPU::V_SUBREV_CO_U32_e64:
3762   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3763   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3764 
3765   case AMDGPU::V_SUBBREV_U32_e32:
3766   case AMDGPU::V_SUBBREV_U32_e64:
3767   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3768   case AMDGPU::V_SUBBREV_U32_e32_vi:
3769   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3770   case AMDGPU::V_SUBBREV_U32_e64_vi:
3771 
3772   case AMDGPU::V_SUBREV_U32_e32:
3773   case AMDGPU::V_SUBREV_U32_e64:
3774   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3775   case AMDGPU::V_SUBREV_U32_e32_vi:
3776   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3777   case AMDGPU::V_SUBREV_U32_e64_vi:
3778 
3779   case AMDGPU::V_SUBREV_F16_e32:
3780   case AMDGPU::V_SUBREV_F16_e64:
3781   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3782   case AMDGPU::V_SUBREV_F16_e32_vi:
3783   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3784   case AMDGPU::V_SUBREV_F16_e64_vi:
3785 
3786   case AMDGPU::V_SUBREV_U16_e32:
3787   case AMDGPU::V_SUBREV_U16_e64:
3788   case AMDGPU::V_SUBREV_U16_e32_vi:
3789   case AMDGPU::V_SUBREV_U16_e64_vi:
3790 
3791   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3792   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3793   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3794 
3795   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3796   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3797 
3798   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3799   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3800 
3801   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3802   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3803 
3804   case AMDGPU::V_LSHRREV_B32_e32:
3805   case AMDGPU::V_LSHRREV_B32_e64:
3806   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3807   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3808   case AMDGPU::V_LSHRREV_B32_e32_vi:
3809   case AMDGPU::V_LSHRREV_B32_e64_vi:
3810   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3811   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3812 
3813   case AMDGPU::V_ASHRREV_I32_e32:
3814   case AMDGPU::V_ASHRREV_I32_e64:
3815   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3816   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3817   case AMDGPU::V_ASHRREV_I32_e32_vi:
3818   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3819   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3820   case AMDGPU::V_ASHRREV_I32_e64_vi:
3821 
3822   case AMDGPU::V_LSHLREV_B32_e32:
3823   case AMDGPU::V_LSHLREV_B32_e64:
3824   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3825   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3826   case AMDGPU::V_LSHLREV_B32_e32_vi:
3827   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3828   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3829   case AMDGPU::V_LSHLREV_B32_e64_vi:
3830 
3831   case AMDGPU::V_LSHLREV_B16_e32:
3832   case AMDGPU::V_LSHLREV_B16_e64:
3833   case AMDGPU::V_LSHLREV_B16_e32_vi:
3834   case AMDGPU::V_LSHLREV_B16_e64_vi:
3835   case AMDGPU::V_LSHLREV_B16_gfx10:
3836 
3837   case AMDGPU::V_LSHRREV_B16_e32:
3838   case AMDGPU::V_LSHRREV_B16_e64:
3839   case AMDGPU::V_LSHRREV_B16_e32_vi:
3840   case AMDGPU::V_LSHRREV_B16_e64_vi:
3841   case AMDGPU::V_LSHRREV_B16_gfx10:
3842 
3843   case AMDGPU::V_ASHRREV_I16_e32:
3844   case AMDGPU::V_ASHRREV_I16_e64:
3845   case AMDGPU::V_ASHRREV_I16_e32_vi:
3846   case AMDGPU::V_ASHRREV_I16_e64_vi:
3847   case AMDGPU::V_ASHRREV_I16_gfx10:
3848 
3849   case AMDGPU::V_LSHLREV_B64_e64:
3850   case AMDGPU::V_LSHLREV_B64_gfx10:
3851   case AMDGPU::V_LSHLREV_B64_vi:
3852 
3853   case AMDGPU::V_LSHRREV_B64_e64:
3854   case AMDGPU::V_LSHRREV_B64_gfx10:
3855   case AMDGPU::V_LSHRREV_B64_vi:
3856 
3857   case AMDGPU::V_ASHRREV_I64_e64:
3858   case AMDGPU::V_ASHRREV_I64_gfx10:
3859   case AMDGPU::V_ASHRREV_I64_vi:
3860 
3861   case AMDGPU::V_PK_LSHLREV_B16:
3862   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3863   case AMDGPU::V_PK_LSHLREV_B16_vi:
3864 
3865   case AMDGPU::V_PK_LSHRREV_B16:
3866   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3867   case AMDGPU::V_PK_LSHRREV_B16_vi:
3868   case AMDGPU::V_PK_ASHRREV_I16:
3869   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3870   case AMDGPU::V_PK_ASHRREV_I16_vi:
3871     return true;
3872   default:
3873     return false;
3874   }
3875 }
3876 
3877 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3878 
3879   using namespace SIInstrFlags;
3880   const unsigned Opcode = Inst.getOpcode();
3881   const MCInstrDesc &Desc = MII.get(Opcode);
3882 
3883   // lds_direct register is defined so that it can be used
3884   // with 9-bit operands only. Ignore encodings which do not accept these.
3885   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3886   if ((Desc.TSFlags & Enc) == 0)
3887     return None;
3888 
3889   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3890     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3891     if (SrcIdx == -1)
3892       break;
3893     const auto &Src = Inst.getOperand(SrcIdx);
3894     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3895 
3896       if (isGFX90A())
3897         return StringRef("lds_direct is not supported on this GPU");
3898 
3899       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3900         return StringRef("lds_direct cannot be used with this instruction");
3901 
3902       if (SrcName != OpName::src0)
3903         return StringRef("lds_direct may be used as src0 only");
3904     }
3905   }
3906 
3907   return None;
3908 }
3909 
3910 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3911   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3912     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3913     if (Op.isFlatOffset())
3914       return Op.getStartLoc();
3915   }
3916   return getLoc();
3917 }
3918 
3919 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3920                                          const OperandVector &Operands) {
3921   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3922   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3923     return true;
3924 
3925   auto Opcode = Inst.getOpcode();
3926   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3927   assert(OpNum != -1);
3928 
3929   const auto &Op = Inst.getOperand(OpNum);
3930   if (!hasFlatOffsets() && Op.getImm() != 0) {
3931     Error(getFlatOffsetLoc(Operands),
3932           "flat offset modifier is not supported on this GPU");
3933     return false;
3934   }
3935 
3936   // For FLAT segment the offset must be positive;
3937   // MSB is ignored and forced to zero.
3938   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3939     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3940     if (!isIntN(OffsetSize, Op.getImm())) {
3941       Error(getFlatOffsetLoc(Operands),
3942             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3943       return false;
3944     }
3945   } else {
3946     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3947     if (!isUIntN(OffsetSize, Op.getImm())) {
3948       Error(getFlatOffsetLoc(Operands),
3949             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3950       return false;
3951     }
3952   }
3953 
3954   return true;
3955 }
3956 
3957 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3958   // Start with second operand because SMEM Offset cannot be dst or src0.
3959   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3960     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3961     if (Op.isSMEMOffset())
3962       return Op.getStartLoc();
3963   }
3964   return getLoc();
3965 }
3966 
3967 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3968                                          const OperandVector &Operands) {
3969   if (isCI() || isSI())
3970     return true;
3971 
3972   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3973   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3974     return true;
3975 
3976   auto Opcode = Inst.getOpcode();
3977   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3978   if (OpNum == -1)
3979     return true;
3980 
3981   const auto &Op = Inst.getOperand(OpNum);
3982   if (!Op.isImm())
3983     return true;
3984 
3985   uint64_t Offset = Op.getImm();
3986   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3987   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3988       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3989     return true;
3990 
3991   Error(getSMEMOffsetLoc(Operands),
3992         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3993                                "expected a 21-bit signed offset");
3994 
3995   return false;
3996 }
3997 
3998 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3999   unsigned Opcode = Inst.getOpcode();
4000   const MCInstrDesc &Desc = MII.get(Opcode);
4001   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4002     return true;
4003 
4004   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4005   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4006 
4007   const int OpIndices[] = { Src0Idx, Src1Idx };
4008 
4009   unsigned NumExprs = 0;
4010   unsigned NumLiterals = 0;
4011   uint32_t LiteralValue;
4012 
4013   for (int OpIdx : OpIndices) {
4014     if (OpIdx == -1) break;
4015 
4016     const MCOperand &MO = Inst.getOperand(OpIdx);
4017     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4018     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4019       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4020         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4021         if (NumLiterals == 0 || LiteralValue != Value) {
4022           LiteralValue = Value;
4023           ++NumLiterals;
4024         }
4025       } else if (MO.isExpr()) {
4026         ++NumExprs;
4027       }
4028     }
4029   }
4030 
4031   return NumLiterals + NumExprs <= 1;
4032 }
4033 
4034 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4035   const unsigned Opc = Inst.getOpcode();
4036   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4037       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4038     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4039     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4040 
4041     if (OpSel & ~3)
4042       return false;
4043   }
4044   return true;
4045 }
4046 
4047 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4048                                   const OperandVector &Operands) {
4049   const unsigned Opc = Inst.getOpcode();
4050   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4051   if (DppCtrlIdx < 0)
4052     return true;
4053   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4054 
4055   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4056     // DPP64 is supported for row_newbcast only.
4057     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4058     if (Src0Idx >= 0 &&
4059         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4060       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4061       Error(S, "64 bit dpp only supports row_newbcast");
4062       return false;
4063     }
4064   }
4065 
4066   return true;
4067 }
4068 
4069 // Check if VCC register matches wavefront size
4070 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4071   auto FB = getFeatureBits();
4072   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4073     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4074 }
4075 
4076 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4077 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4078                                          const OperandVector &Operands) {
4079   unsigned Opcode = Inst.getOpcode();
4080   const MCInstrDesc &Desc = MII.get(Opcode);
4081   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4082   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4083       ImmIdx == -1)
4084     return true;
4085 
4086   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4087   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4088   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4089 
4090   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4091 
4092   unsigned NumExprs = 0;
4093   unsigned NumLiterals = 0;
4094   uint32_t LiteralValue;
4095 
4096   for (int OpIdx : OpIndices) {
4097     if (OpIdx == -1)
4098       continue;
4099 
4100     const MCOperand &MO = Inst.getOperand(OpIdx);
4101     if (!MO.isImm() && !MO.isExpr())
4102       continue;
4103     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4104       continue;
4105 
4106     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4107         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4108       Error(getConstLoc(Operands),
4109             "inline constants are not allowed for this operand");
4110       return false;
4111     }
4112 
4113     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4114       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4115       if (NumLiterals == 0 || LiteralValue != Value) {
4116         LiteralValue = Value;
4117         ++NumLiterals;
4118       }
4119     } else if (MO.isExpr()) {
4120       ++NumExprs;
4121     }
4122   }
4123   NumLiterals += NumExprs;
4124 
4125   if (!NumLiterals)
4126     return true;
4127 
4128   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4129     Error(getLitLoc(Operands), "literal operands are not supported");
4130     return false;
4131   }
4132 
4133   if (NumLiterals > 1) {
4134     Error(getLitLoc(Operands), "only one literal operand is allowed");
4135     return false;
4136   }
4137 
4138   return true;
4139 }
4140 
4141 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4142 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4143                          const MCRegisterInfo *MRI) {
4144   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4145   if (OpIdx < 0)
4146     return -1;
4147 
4148   const MCOperand &Op = Inst.getOperand(OpIdx);
4149   if (!Op.isReg())
4150     return -1;
4151 
4152   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4153   auto Reg = Sub ? Sub : Op.getReg();
4154   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4155   return AGPR32.contains(Reg) ? 1 : 0;
4156 }
4157 
4158 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4159   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4160   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4161                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4162                   SIInstrFlags::DS)) == 0)
4163     return true;
4164 
4165   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4166                                                       : AMDGPU::OpName::vdata;
4167 
4168   const MCRegisterInfo *MRI = getMRI();
4169   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4170   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4171 
4172   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4173     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4174     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4175       return false;
4176   }
4177 
4178   auto FB = getFeatureBits();
4179   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4180     if (DataAreg < 0 || DstAreg < 0)
4181       return true;
4182     return DstAreg == DataAreg;
4183   }
4184 
4185   return DstAreg < 1 && DataAreg < 1;
4186 }
4187 
4188 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4189   auto FB = getFeatureBits();
4190   if (!FB[AMDGPU::FeatureGFX90AInsts])
4191     return true;
4192 
4193   const MCRegisterInfo *MRI = getMRI();
4194   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4195   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4196   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4197     const MCOperand &Op = Inst.getOperand(I);
4198     if (!Op.isReg())
4199       continue;
4200 
4201     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4202     if (!Sub)
4203       continue;
4204 
4205     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4206       return false;
4207     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4208       return false;
4209   }
4210 
4211   return true;
4212 }
4213 
4214 // gfx90a has an undocumented limitation:
4215 // DS_GWS opcodes must use even aligned registers.
4216 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4217                                   const OperandVector &Operands) {
4218   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4219     return true;
4220 
4221   int Opc = Inst.getOpcode();
4222   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4223       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4224     return true;
4225 
4226   const MCRegisterInfo *MRI = getMRI();
4227   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4228   int Data0Pos =
4229       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4230   assert(Data0Pos != -1);
4231   auto Reg = Inst.getOperand(Data0Pos).getReg();
4232   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4233   if (RegIdx & 1) {
4234     SMLoc RegLoc = getRegLoc(Reg, Operands);
4235     Error(RegLoc, "vgpr must be even aligned");
4236     return false;
4237   }
4238 
4239   return true;
4240 }
4241 
4242 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4243                                             const OperandVector &Operands,
4244                                             const SMLoc &IDLoc) {
4245   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4246                                            AMDGPU::OpName::cpol);
4247   if (CPolPos == -1)
4248     return true;
4249 
4250   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4251 
4252   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4253   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4254       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4255     Error(IDLoc, "invalid cache policy for SMRD instruction");
4256     return false;
4257   }
4258 
4259   if (isGFX90A() && (CPol & CPol::SCC)) {
4260     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4261     StringRef CStr(S.getPointer());
4262     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4263     Error(S, "scc is not supported on this GPU");
4264     return false;
4265   }
4266 
4267   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4268     return true;
4269 
4270   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4271     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4272       Error(IDLoc, "instruction must use glc");
4273       return false;
4274     }
4275   } else {
4276     if (CPol & CPol::GLC) {
4277       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4278       StringRef CStr(S.getPointer());
4279       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4280       Error(S, "instruction must not use glc");
4281       return false;
4282     }
4283   }
4284 
4285   return true;
4286 }
4287 
4288 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4289                                           const SMLoc &IDLoc,
4290                                           const OperandVector &Operands) {
4291   if (auto ErrMsg = validateLdsDirect(Inst)) {
4292     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4293     return false;
4294   }
4295   if (!validateSOPLiteral(Inst)) {
4296     Error(getLitLoc(Operands),
4297       "only one literal operand is allowed");
4298     return false;
4299   }
4300   if (!validateVOPLiteral(Inst, Operands)) {
4301     return false;
4302   }
4303   if (!validateConstantBusLimitations(Inst, Operands)) {
4304     return false;
4305   }
4306   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4307     return false;
4308   }
4309   if (!validateIntClampSupported(Inst)) {
4310     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4311       "integer clamping is not supported on this GPU");
4312     return false;
4313   }
4314   if (!validateOpSel(Inst)) {
4315     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4316       "invalid op_sel operand");
4317     return false;
4318   }
4319   if (!validateDPP(Inst, Operands)) {
4320     return false;
4321   }
4322   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4323   if (!validateMIMGD16(Inst)) {
4324     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4325       "d16 modifier is not supported on this GPU");
4326     return false;
4327   }
4328   if (!validateMIMGDim(Inst)) {
4329     Error(IDLoc, "dim modifier is required on this GPU");
4330     return false;
4331   }
4332   if (!validateMIMGMSAA(Inst)) {
4333     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4334           "invalid dim; must be MSAA type");
4335     return false;
4336   }
4337   if (!validateMIMGDataSize(Inst)) {
4338     Error(IDLoc,
4339       "image data size does not match dmask and tfe");
4340     return false;
4341   }
4342   if (!validateMIMGAddrSize(Inst)) {
4343     Error(IDLoc,
4344       "image address size does not match dim and a16");
4345     return false;
4346   }
4347   if (!validateMIMGAtomicDMask(Inst)) {
4348     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4349       "invalid atomic image dmask");
4350     return false;
4351   }
4352   if (!validateMIMGGatherDMask(Inst)) {
4353     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4354       "invalid image_gather dmask: only one bit must be set");
4355     return false;
4356   }
4357   if (!validateMovrels(Inst, Operands)) {
4358     return false;
4359   }
4360   if (!validateFlatOffset(Inst, Operands)) {
4361     return false;
4362   }
4363   if (!validateSMEMOffset(Inst, Operands)) {
4364     return false;
4365   }
4366   if (!validateMAIAccWrite(Inst, Operands)) {
4367     return false;
4368   }
4369   if (!validateMFMA(Inst, Operands)) {
4370     return false;
4371   }
4372   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4373     return false;
4374   }
4375 
4376   if (!validateAGPRLdSt(Inst)) {
4377     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4378     ? "invalid register class: data and dst should be all VGPR or AGPR"
4379     : "invalid register class: agpr loads and stores not supported on this GPU"
4380     );
4381     return false;
4382   }
4383   if (!validateVGPRAlign(Inst)) {
4384     Error(IDLoc,
4385       "invalid register class: vgpr tuples must be 64 bit aligned");
4386     return false;
4387   }
4388   if (!validateGWS(Inst, Operands)) {
4389     return false;
4390   }
4391 
4392   if (!validateDivScale(Inst)) {
4393     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4394     return false;
4395   }
4396   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4397     return false;
4398   }
4399 
4400   return true;
4401 }
4402 
4403 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4404                                             const FeatureBitset &FBS,
4405                                             unsigned VariantID = 0);
4406 
4407 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4408                                 const FeatureBitset &AvailableFeatures,
4409                                 unsigned VariantID);
4410 
4411 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4412                                        const FeatureBitset &FBS) {
4413   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4414 }
4415 
4416 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4417                                        const FeatureBitset &FBS,
4418                                        ArrayRef<unsigned> Variants) {
4419   for (auto Variant : Variants) {
4420     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4421       return true;
4422   }
4423 
4424   return false;
4425 }
4426 
4427 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4428                                                   const SMLoc &IDLoc) {
4429   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4430 
4431   // Check if requested instruction variant is supported.
4432   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4433     return false;
4434 
4435   // This instruction is not supported.
4436   // Clear any other pending errors because they are no longer relevant.
4437   getParser().clearPendingErrors();
4438 
4439   // Requested instruction variant is not supported.
4440   // Check if any other variants are supported.
4441   StringRef VariantName = getMatchedVariantName();
4442   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4443     return Error(IDLoc,
4444                  Twine(VariantName,
4445                        " variant of this instruction is not supported"));
4446   }
4447 
4448   // Finally check if this instruction is supported on any other GPU.
4449   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4450     return Error(IDLoc, "instruction not supported on this GPU");
4451   }
4452 
4453   // Instruction not supported on any GPU. Probably a typo.
4454   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4455   return Error(IDLoc, "invalid instruction" + Suggestion);
4456 }
4457 
4458 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4459                                               OperandVector &Operands,
4460                                               MCStreamer &Out,
4461                                               uint64_t &ErrorInfo,
4462                                               bool MatchingInlineAsm) {
4463   MCInst Inst;
4464   unsigned Result = Match_Success;
4465   for (auto Variant : getMatchedVariants()) {
4466     uint64_t EI;
4467     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4468                                   Variant);
4469     // We order match statuses from least to most specific. We use most specific
4470     // status as resulting
4471     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4472     if ((R == Match_Success) ||
4473         (R == Match_PreferE32) ||
4474         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4475         (R == Match_InvalidOperand && Result != Match_MissingFeature
4476                                    && Result != Match_PreferE32) ||
4477         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4478                                    && Result != Match_MissingFeature
4479                                    && Result != Match_PreferE32)) {
4480       Result = R;
4481       ErrorInfo = EI;
4482     }
4483     if (R == Match_Success)
4484       break;
4485   }
4486 
4487   if (Result == Match_Success) {
4488     if (!validateInstruction(Inst, IDLoc, Operands)) {
4489       return true;
4490     }
4491     Inst.setLoc(IDLoc);
4492     Out.emitInstruction(Inst, getSTI());
4493     return false;
4494   }
4495 
4496   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4497   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4498     return true;
4499   }
4500 
4501   switch (Result) {
4502   default: break;
4503   case Match_MissingFeature:
4504     // It has been verified that the specified instruction
4505     // mnemonic is valid. A match was found but it requires
4506     // features which are not supported on this GPU.
4507     return Error(IDLoc, "operands are not valid for this GPU or mode");
4508 
4509   case Match_InvalidOperand: {
4510     SMLoc ErrorLoc = IDLoc;
4511     if (ErrorInfo != ~0ULL) {
4512       if (ErrorInfo >= Operands.size()) {
4513         return Error(IDLoc, "too few operands for instruction");
4514       }
4515       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4516       if (ErrorLoc == SMLoc())
4517         ErrorLoc = IDLoc;
4518     }
4519     return Error(ErrorLoc, "invalid operand for instruction");
4520   }
4521 
4522   case Match_PreferE32:
4523     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4524                         "should be encoded as e32");
4525   case Match_MnemonicFail:
4526     llvm_unreachable("Invalid instructions should have been handled already");
4527   }
4528   llvm_unreachable("Implement any new match types added!");
4529 }
4530 
4531 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4532   int64_t Tmp = -1;
4533   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4534     return true;
4535   }
4536   if (getParser().parseAbsoluteExpression(Tmp)) {
4537     return true;
4538   }
4539   Ret = static_cast<uint32_t>(Tmp);
4540   return false;
4541 }
4542 
4543 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4544                                                uint32_t &Minor) {
4545   if (ParseAsAbsoluteExpression(Major))
4546     return TokError("invalid major version");
4547 
4548   if (!trySkipToken(AsmToken::Comma))
4549     return TokError("minor version number required, comma expected");
4550 
4551   if (ParseAsAbsoluteExpression(Minor))
4552     return TokError("invalid minor version");
4553 
4554   return false;
4555 }
4556 
4557 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4558   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4559     return TokError("directive only supported for amdgcn architecture");
4560 
4561   std::string TargetIDDirective;
4562   SMLoc TargetStart = getTok().getLoc();
4563   if (getParser().parseEscapedString(TargetIDDirective))
4564     return true;
4565 
4566   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4567   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4568     return getParser().Error(TargetRange.Start,
4569         (Twine(".amdgcn_target directive's target id ") +
4570          Twine(TargetIDDirective) +
4571          Twine(" does not match the specified target id ") +
4572          Twine(getTargetStreamer().getTargetID()->toString())).str());
4573 
4574   return false;
4575 }
4576 
4577 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4578   return Error(Range.Start, "value out of range", Range);
4579 }
4580 
4581 bool AMDGPUAsmParser::calculateGPRBlocks(
4582     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4583     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4584     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4585     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4586   // TODO(scott.linder): These calculations are duplicated from
4587   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4588   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4589 
4590   unsigned NumVGPRs = NextFreeVGPR;
4591   unsigned NumSGPRs = NextFreeSGPR;
4592 
4593   if (Version.Major >= 10)
4594     NumSGPRs = 0;
4595   else {
4596     unsigned MaxAddressableNumSGPRs =
4597         IsaInfo::getAddressableNumSGPRs(&getSTI());
4598 
4599     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4600         NumSGPRs > MaxAddressableNumSGPRs)
4601       return OutOfRangeError(SGPRRange);
4602 
4603     NumSGPRs +=
4604         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4605 
4606     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4607         NumSGPRs > MaxAddressableNumSGPRs)
4608       return OutOfRangeError(SGPRRange);
4609 
4610     if (Features.test(FeatureSGPRInitBug))
4611       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4612   }
4613 
4614   VGPRBlocks =
4615       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4616   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4617 
4618   return false;
4619 }
4620 
4621 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4622   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4623     return TokError("directive only supported for amdgcn architecture");
4624 
4625   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4626     return TokError("directive only supported for amdhsa OS");
4627 
4628   StringRef KernelName;
4629   if (getParser().parseIdentifier(KernelName))
4630     return true;
4631 
4632   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4633 
4634   StringSet<> Seen;
4635 
4636   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4637 
4638   SMRange VGPRRange;
4639   uint64_t NextFreeVGPR = 0;
4640   uint64_t AccumOffset = 0;
4641   SMRange SGPRRange;
4642   uint64_t NextFreeSGPR = 0;
4643 
4644   // Count the number of user SGPRs implied from the enabled feature bits.
4645   unsigned ImpliedUserSGPRCount = 0;
4646 
4647   // Track if the asm explicitly contains the directive for the user SGPR
4648   // count.
4649   Optional<unsigned> ExplicitUserSGPRCount;
4650   bool ReserveVCC = true;
4651   bool ReserveFlatScr = true;
4652   Optional<bool> EnableWavefrontSize32;
4653 
4654   while (true) {
4655     while (trySkipToken(AsmToken::EndOfStatement));
4656 
4657     StringRef ID;
4658     SMRange IDRange = getTok().getLocRange();
4659     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4660       return true;
4661 
4662     if (ID == ".end_amdhsa_kernel")
4663       break;
4664 
4665     if (Seen.find(ID) != Seen.end())
4666       return TokError(".amdhsa_ directives cannot be repeated");
4667     Seen.insert(ID);
4668 
4669     SMLoc ValStart = getLoc();
4670     int64_t IVal;
4671     if (getParser().parseAbsoluteExpression(IVal))
4672       return true;
4673     SMLoc ValEnd = getLoc();
4674     SMRange ValRange = SMRange(ValStart, ValEnd);
4675 
4676     if (IVal < 0)
4677       return OutOfRangeError(ValRange);
4678 
4679     uint64_t Val = IVal;
4680 
4681 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4682   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4683     return OutOfRangeError(RANGE);                                             \
4684   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4685 
4686     if (ID == ".amdhsa_group_segment_fixed_size") {
4687       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4688         return OutOfRangeError(ValRange);
4689       KD.group_segment_fixed_size = Val;
4690     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4691       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4692         return OutOfRangeError(ValRange);
4693       KD.private_segment_fixed_size = Val;
4694     } else if (ID == ".amdhsa_kernarg_size") {
4695       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4696         return OutOfRangeError(ValRange);
4697       KD.kernarg_size = Val;
4698     } else if (ID == ".amdhsa_user_sgpr_count") {
4699       ExplicitUserSGPRCount = Val;
4700     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4701       if (hasArchitectedFlatScratch())
4702         return Error(IDRange.Start,
4703                      "directive is not supported with architected flat scratch",
4704                      IDRange);
4705       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4706                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4707                        Val, ValRange);
4708       if (Val)
4709         ImpliedUserSGPRCount += 4;
4710     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4711       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4712                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4713                        ValRange);
4714       if (Val)
4715         ImpliedUserSGPRCount += 2;
4716     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4717       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4718                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4719                        ValRange);
4720       if (Val)
4721         ImpliedUserSGPRCount += 2;
4722     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4723       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4724                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4725                        Val, ValRange);
4726       if (Val)
4727         ImpliedUserSGPRCount += 2;
4728     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4729       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4730                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4731                        ValRange);
4732       if (Val)
4733         ImpliedUserSGPRCount += 2;
4734     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4735       if (hasArchitectedFlatScratch())
4736         return Error(IDRange.Start,
4737                      "directive is not supported with architected flat scratch",
4738                      IDRange);
4739       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4740                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4741                        ValRange);
4742       if (Val)
4743         ImpliedUserSGPRCount += 2;
4744     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4745       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4746                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4747                        Val, ValRange);
4748       if (Val)
4749         ImpliedUserSGPRCount += 1;
4750     } else if (ID == ".amdhsa_wavefront_size32") {
4751       if (IVersion.Major < 10)
4752         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4753       EnableWavefrontSize32 = Val;
4754       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4755                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4756                        Val, ValRange);
4757     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4758       if (hasArchitectedFlatScratch())
4759         return Error(IDRange.Start,
4760                      "directive is not supported with architected flat scratch",
4761                      IDRange);
4762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4763                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4764     } else if (ID == ".amdhsa_enable_private_segment") {
4765       if (!hasArchitectedFlatScratch())
4766         return Error(
4767             IDRange.Start,
4768             "directive is not supported without architected flat scratch",
4769             IDRange);
4770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4771                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4772     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4773       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4774                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4775                        ValRange);
4776     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4778                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4779                        ValRange);
4780     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4782                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4783                        ValRange);
4784     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4787                        ValRange);
4788     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4790                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4791                        ValRange);
4792     } else if (ID == ".amdhsa_next_free_vgpr") {
4793       VGPRRange = ValRange;
4794       NextFreeVGPR = Val;
4795     } else if (ID == ".amdhsa_next_free_sgpr") {
4796       SGPRRange = ValRange;
4797       NextFreeSGPR = Val;
4798     } else if (ID == ".amdhsa_accum_offset") {
4799       if (!isGFX90A())
4800         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4801       AccumOffset = Val;
4802     } else if (ID == ".amdhsa_reserve_vcc") {
4803       if (!isUInt<1>(Val))
4804         return OutOfRangeError(ValRange);
4805       ReserveVCC = Val;
4806     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4807       if (IVersion.Major < 7)
4808         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4809       if (hasArchitectedFlatScratch())
4810         return Error(IDRange.Start,
4811                      "directive is not supported with architected flat scratch",
4812                      IDRange);
4813       if (!isUInt<1>(Val))
4814         return OutOfRangeError(ValRange);
4815       ReserveFlatScr = Val;
4816     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4817       if (IVersion.Major < 8)
4818         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4819       if (!isUInt<1>(Val))
4820         return OutOfRangeError(ValRange);
4821       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4822         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4823                                  IDRange);
4824     } else if (ID == ".amdhsa_float_round_mode_32") {
4825       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4826                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4827     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4828       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4829                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4830     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4831       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4832                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4833     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4834       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4835                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4836                        ValRange);
4837     } else if (ID == ".amdhsa_dx10_clamp") {
4838       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4839                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4840     } else if (ID == ".amdhsa_ieee_mode") {
4841       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4842                        Val, ValRange);
4843     } else if (ID == ".amdhsa_fp16_overflow") {
4844       if (IVersion.Major < 9)
4845         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4846       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4847                        ValRange);
4848     } else if (ID == ".amdhsa_tg_split") {
4849       if (!isGFX90A())
4850         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4852                        ValRange);
4853     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4854       if (IVersion.Major < 10)
4855         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4856       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4857                        ValRange);
4858     } else if (ID == ".amdhsa_memory_ordered") {
4859       if (IVersion.Major < 10)
4860         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4862                        ValRange);
4863     } else if (ID == ".amdhsa_forward_progress") {
4864       if (IVersion.Major < 10)
4865         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4866       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4867                        ValRange);
4868     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4869       PARSE_BITS_ENTRY(
4870           KD.compute_pgm_rsrc2,
4871           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4872           ValRange);
4873     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4875                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4876                        Val, ValRange);
4877     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4878       PARSE_BITS_ENTRY(
4879           KD.compute_pgm_rsrc2,
4880           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4881           ValRange);
4882     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4883       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4884                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4885                        Val, ValRange);
4886     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4887       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4888                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4889                        Val, ValRange);
4890     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4891       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4892                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4893                        Val, ValRange);
4894     } else if (ID == ".amdhsa_exception_int_div_zero") {
4895       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4896                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4897                        Val, ValRange);
4898     } else {
4899       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4900     }
4901 
4902 #undef PARSE_BITS_ENTRY
4903   }
4904 
4905   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4906     return TokError(".amdhsa_next_free_vgpr directive is required");
4907 
4908   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4909     return TokError(".amdhsa_next_free_sgpr directive is required");
4910 
4911   unsigned VGPRBlocks;
4912   unsigned SGPRBlocks;
4913   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4914                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4915                          EnableWavefrontSize32, NextFreeVGPR,
4916                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4917                          SGPRBlocks))
4918     return true;
4919 
4920   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4921           VGPRBlocks))
4922     return OutOfRangeError(VGPRRange);
4923   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4924                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4925 
4926   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4927           SGPRBlocks))
4928     return OutOfRangeError(SGPRRange);
4929   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4930                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4931                   SGPRBlocks);
4932 
4933   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4934     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4935                     "enabled user SGPRs");
4936 
4937   unsigned UserSGPRCount =
4938       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4939 
4940   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4941     return TokError("too many user SGPRs enabled");
4942   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4943                   UserSGPRCount);
4944 
4945   if (isGFX90A()) {
4946     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4947       return TokError(".amdhsa_accum_offset directive is required");
4948     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4949       return TokError("accum_offset should be in range [4..256] in "
4950                       "increments of 4");
4951     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4952       return TokError("accum_offset exceeds total VGPR allocation");
4953     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4954                     (AccumOffset / 4 - 1));
4955   }
4956 
4957   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4958       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4959       ReserveFlatScr);
4960   return false;
4961 }
4962 
4963 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4964   uint32_t Major;
4965   uint32_t Minor;
4966 
4967   if (ParseDirectiveMajorMinor(Major, Minor))
4968     return true;
4969 
4970   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4971   return false;
4972 }
4973 
4974 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4975   uint32_t Major;
4976   uint32_t Minor;
4977   uint32_t Stepping;
4978   StringRef VendorName;
4979   StringRef ArchName;
4980 
4981   // If this directive has no arguments, then use the ISA version for the
4982   // targeted GPU.
4983   if (isToken(AsmToken::EndOfStatement)) {
4984     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4985     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4986                                                         ISA.Stepping,
4987                                                         "AMD", "AMDGPU");
4988     return false;
4989   }
4990 
4991   if (ParseDirectiveMajorMinor(Major, Minor))
4992     return true;
4993 
4994   if (!trySkipToken(AsmToken::Comma))
4995     return TokError("stepping version number required, comma expected");
4996 
4997   if (ParseAsAbsoluteExpression(Stepping))
4998     return TokError("invalid stepping version");
4999 
5000   if (!trySkipToken(AsmToken::Comma))
5001     return TokError("vendor name required, comma expected");
5002 
5003   if (!parseString(VendorName, "invalid vendor name"))
5004     return true;
5005 
5006   if (!trySkipToken(AsmToken::Comma))
5007     return TokError("arch name required, comma expected");
5008 
5009   if (!parseString(ArchName, "invalid arch name"))
5010     return true;
5011 
5012   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5013                                                       VendorName, ArchName);
5014   return false;
5015 }
5016 
5017 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5018                                                amd_kernel_code_t &Header) {
5019   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5020   // assembly for backwards compatibility.
5021   if (ID == "max_scratch_backing_memory_byte_size") {
5022     Parser.eatToEndOfStatement();
5023     return false;
5024   }
5025 
5026   SmallString<40> ErrStr;
5027   raw_svector_ostream Err(ErrStr);
5028   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5029     return TokError(Err.str());
5030   }
5031   Lex();
5032 
5033   if (ID == "enable_wavefront_size32") {
5034     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5035       if (!isGFX10Plus())
5036         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5037       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5038         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5039     } else {
5040       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5041         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5042     }
5043   }
5044 
5045   if (ID == "wavefront_size") {
5046     if (Header.wavefront_size == 5) {
5047       if (!isGFX10Plus())
5048         return TokError("wavefront_size=5 is only allowed on GFX10+");
5049       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5050         return TokError("wavefront_size=5 requires +WavefrontSize32");
5051     } else if (Header.wavefront_size == 6) {
5052       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5053         return TokError("wavefront_size=6 requires +WavefrontSize64");
5054     }
5055   }
5056 
5057   if (ID == "enable_wgp_mode") {
5058     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5059         !isGFX10Plus())
5060       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5061   }
5062 
5063   if (ID == "enable_mem_ordered") {
5064     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5065         !isGFX10Plus())
5066       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5067   }
5068 
5069   if (ID == "enable_fwd_progress") {
5070     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5071         !isGFX10Plus())
5072       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5073   }
5074 
5075   return false;
5076 }
5077 
5078 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5079   amd_kernel_code_t Header;
5080   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5081 
5082   while (true) {
5083     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5084     // will set the current token to EndOfStatement.
5085     while(trySkipToken(AsmToken::EndOfStatement));
5086 
5087     StringRef ID;
5088     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5089       return true;
5090 
5091     if (ID == ".end_amd_kernel_code_t")
5092       break;
5093 
5094     if (ParseAMDKernelCodeTValue(ID, Header))
5095       return true;
5096   }
5097 
5098   getTargetStreamer().EmitAMDKernelCodeT(Header);
5099 
5100   return false;
5101 }
5102 
5103 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5104   StringRef KernelName;
5105   if (!parseId(KernelName, "expected symbol name"))
5106     return true;
5107 
5108   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5109                                            ELF::STT_AMDGPU_HSA_KERNEL);
5110 
5111   KernelScope.initialize(getContext());
5112   return false;
5113 }
5114 
5115 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5116   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5117     return Error(getLoc(),
5118                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5119                  "architectures");
5120   }
5121 
5122   auto TargetIDDirective = getLexer().getTok().getStringContents();
5123   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5124     return Error(getParser().getTok().getLoc(), "target id must match options");
5125 
5126   getTargetStreamer().EmitISAVersion();
5127   Lex();
5128 
5129   return false;
5130 }
5131 
5132 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5133   const char *AssemblerDirectiveBegin;
5134   const char *AssemblerDirectiveEnd;
5135   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5136       isHsaAbiVersion3AndAbove(&getSTI())
5137           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5138                             HSAMD::V3::AssemblerDirectiveEnd)
5139           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5140                             HSAMD::AssemblerDirectiveEnd);
5141 
5142   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5143     return Error(getLoc(),
5144                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5145                  "not available on non-amdhsa OSes")).str());
5146   }
5147 
5148   std::string HSAMetadataString;
5149   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5150                           HSAMetadataString))
5151     return true;
5152 
5153   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5154     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5155       return Error(getLoc(), "invalid HSA metadata");
5156   } else {
5157     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5158       return Error(getLoc(), "invalid HSA metadata");
5159   }
5160 
5161   return false;
5162 }
5163 
5164 /// Common code to parse out a block of text (typically YAML) between start and
5165 /// end directives.
5166 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5167                                           const char *AssemblerDirectiveEnd,
5168                                           std::string &CollectString) {
5169 
5170   raw_string_ostream CollectStream(CollectString);
5171 
5172   getLexer().setSkipSpace(false);
5173 
5174   bool FoundEnd = false;
5175   while (!isToken(AsmToken::Eof)) {
5176     while (isToken(AsmToken::Space)) {
5177       CollectStream << getTokenStr();
5178       Lex();
5179     }
5180 
5181     if (trySkipId(AssemblerDirectiveEnd)) {
5182       FoundEnd = true;
5183       break;
5184     }
5185 
5186     CollectStream << Parser.parseStringToEndOfStatement()
5187                   << getContext().getAsmInfo()->getSeparatorString();
5188 
5189     Parser.eatToEndOfStatement();
5190   }
5191 
5192   getLexer().setSkipSpace(true);
5193 
5194   if (isToken(AsmToken::Eof) && !FoundEnd) {
5195     return TokError(Twine("expected directive ") +
5196                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5197   }
5198 
5199   CollectStream.flush();
5200   return false;
5201 }
5202 
5203 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5204 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5205   std::string String;
5206   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5207                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5208     return true;
5209 
5210   auto PALMetadata = getTargetStreamer().getPALMetadata();
5211   if (!PALMetadata->setFromString(String))
5212     return Error(getLoc(), "invalid PAL metadata");
5213   return false;
5214 }
5215 
5216 /// Parse the assembler directive for old linear-format PAL metadata.
5217 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5218   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5219     return Error(getLoc(),
5220                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5221                  "not available on non-amdpal OSes")).str());
5222   }
5223 
5224   auto PALMetadata = getTargetStreamer().getPALMetadata();
5225   PALMetadata->setLegacy();
5226   for (;;) {
5227     uint32_t Key, Value;
5228     if (ParseAsAbsoluteExpression(Key)) {
5229       return TokError(Twine("invalid value in ") +
5230                       Twine(PALMD::AssemblerDirective));
5231     }
5232     if (!trySkipToken(AsmToken::Comma)) {
5233       return TokError(Twine("expected an even number of values in ") +
5234                       Twine(PALMD::AssemblerDirective));
5235     }
5236     if (ParseAsAbsoluteExpression(Value)) {
5237       return TokError(Twine("invalid value in ") +
5238                       Twine(PALMD::AssemblerDirective));
5239     }
5240     PALMetadata->setRegister(Key, Value);
5241     if (!trySkipToken(AsmToken::Comma))
5242       break;
5243   }
5244   return false;
5245 }
5246 
5247 /// ParseDirectiveAMDGPULDS
5248 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5249 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5250   if (getParser().checkForValidSection())
5251     return true;
5252 
5253   StringRef Name;
5254   SMLoc NameLoc = getLoc();
5255   if (getParser().parseIdentifier(Name))
5256     return TokError("expected identifier in directive");
5257 
5258   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5259   if (parseToken(AsmToken::Comma, "expected ','"))
5260     return true;
5261 
5262   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5263 
5264   int64_t Size;
5265   SMLoc SizeLoc = getLoc();
5266   if (getParser().parseAbsoluteExpression(Size))
5267     return true;
5268   if (Size < 0)
5269     return Error(SizeLoc, "size must be non-negative");
5270   if (Size > LocalMemorySize)
5271     return Error(SizeLoc, "size is too large");
5272 
5273   int64_t Alignment = 4;
5274   if (trySkipToken(AsmToken::Comma)) {
5275     SMLoc AlignLoc = getLoc();
5276     if (getParser().parseAbsoluteExpression(Alignment))
5277       return true;
5278     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5279       return Error(AlignLoc, "alignment must be a power of two");
5280 
5281     // Alignment larger than the size of LDS is possible in theory, as long
5282     // as the linker manages to place to symbol at address 0, but we do want
5283     // to make sure the alignment fits nicely into a 32-bit integer.
5284     if (Alignment >= 1u << 31)
5285       return Error(AlignLoc, "alignment is too large");
5286   }
5287 
5288   if (parseToken(AsmToken::EndOfStatement,
5289                  "unexpected token in '.amdgpu_lds' directive"))
5290     return true;
5291 
5292   Symbol->redefineIfPossible();
5293   if (!Symbol->isUndefined())
5294     return Error(NameLoc, "invalid symbol redefinition");
5295 
5296   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5297   return false;
5298 }
5299 
5300 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5301   StringRef IDVal = DirectiveID.getString();
5302 
5303   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5304     if (IDVal == ".amdhsa_kernel")
5305      return ParseDirectiveAMDHSAKernel();
5306 
5307     // TODO: Restructure/combine with PAL metadata directive.
5308     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5309       return ParseDirectiveHSAMetadata();
5310   } else {
5311     if (IDVal == ".hsa_code_object_version")
5312       return ParseDirectiveHSACodeObjectVersion();
5313 
5314     if (IDVal == ".hsa_code_object_isa")
5315       return ParseDirectiveHSACodeObjectISA();
5316 
5317     if (IDVal == ".amd_kernel_code_t")
5318       return ParseDirectiveAMDKernelCodeT();
5319 
5320     if (IDVal == ".amdgpu_hsa_kernel")
5321       return ParseDirectiveAMDGPUHsaKernel();
5322 
5323     if (IDVal == ".amd_amdgpu_isa")
5324       return ParseDirectiveISAVersion();
5325 
5326     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5327       return ParseDirectiveHSAMetadata();
5328   }
5329 
5330   if (IDVal == ".amdgcn_target")
5331     return ParseDirectiveAMDGCNTarget();
5332 
5333   if (IDVal == ".amdgpu_lds")
5334     return ParseDirectiveAMDGPULDS();
5335 
5336   if (IDVal == PALMD::AssemblerDirectiveBegin)
5337     return ParseDirectivePALMetadataBegin();
5338 
5339   if (IDVal == PALMD::AssemblerDirective)
5340     return ParseDirectivePALMetadata();
5341 
5342   return true;
5343 }
5344 
5345 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5346                                            unsigned RegNo) {
5347 
5348   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5349     return isGFX9Plus();
5350 
5351   // GFX10 has 2 more SGPRs 104 and 105.
5352   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5353     return hasSGPR104_SGPR105();
5354 
5355   switch (RegNo) {
5356   case AMDGPU::SRC_SHARED_BASE:
5357   case AMDGPU::SRC_SHARED_LIMIT:
5358   case AMDGPU::SRC_PRIVATE_BASE:
5359   case AMDGPU::SRC_PRIVATE_LIMIT:
5360   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5361     return isGFX9Plus();
5362   case AMDGPU::TBA:
5363   case AMDGPU::TBA_LO:
5364   case AMDGPU::TBA_HI:
5365   case AMDGPU::TMA:
5366   case AMDGPU::TMA_LO:
5367   case AMDGPU::TMA_HI:
5368     return !isGFX9Plus();
5369   case AMDGPU::XNACK_MASK:
5370   case AMDGPU::XNACK_MASK_LO:
5371   case AMDGPU::XNACK_MASK_HI:
5372     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5373   case AMDGPU::SGPR_NULL:
5374     return isGFX10Plus();
5375   default:
5376     break;
5377   }
5378 
5379   if (isCI())
5380     return true;
5381 
5382   if (isSI() || isGFX10Plus()) {
5383     // No flat_scr on SI.
5384     // On GFX10 flat scratch is not a valid register operand and can only be
5385     // accessed with s_setreg/s_getreg.
5386     switch (RegNo) {
5387     case AMDGPU::FLAT_SCR:
5388     case AMDGPU::FLAT_SCR_LO:
5389     case AMDGPU::FLAT_SCR_HI:
5390       return false;
5391     default:
5392       return true;
5393     }
5394   }
5395 
5396   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5397   // SI/CI have.
5398   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5399     return hasSGPR102_SGPR103();
5400 
5401   return true;
5402 }
5403 
5404 OperandMatchResultTy
5405 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5406                               OperandMode Mode) {
5407   // Try to parse with a custom parser
5408   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5409 
5410   // If we successfully parsed the operand or if there as an error parsing,
5411   // we are done.
5412   //
5413   // If we are parsing after we reach EndOfStatement then this means we
5414   // are appending default values to the Operands list.  This is only done
5415   // by custom parser, so we shouldn't continue on to the generic parsing.
5416   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5417       isToken(AsmToken::EndOfStatement))
5418     return ResTy;
5419 
5420   SMLoc RBraceLoc;
5421   SMLoc LBraceLoc = getLoc();
5422   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5423     unsigned Prefix = Operands.size();
5424 
5425     for (;;) {
5426       auto Loc = getLoc();
5427       ResTy = parseReg(Operands);
5428       if (ResTy == MatchOperand_NoMatch)
5429         Error(Loc, "expected a register");
5430       if (ResTy != MatchOperand_Success)
5431         return MatchOperand_ParseFail;
5432 
5433       RBraceLoc = getLoc();
5434       if (trySkipToken(AsmToken::RBrac))
5435         break;
5436 
5437       if (!skipToken(AsmToken::Comma,
5438                      "expected a comma or a closing square bracket")) {
5439         return MatchOperand_ParseFail;
5440       }
5441     }
5442 
5443     if (Operands.size() - Prefix > 1) {
5444       Operands.insert(Operands.begin() + Prefix,
5445                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5446       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5447     }
5448 
5449     return MatchOperand_Success;
5450   }
5451 
5452   return parseRegOrImm(Operands);
5453 }
5454 
5455 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5456   // Clear any forced encodings from the previous instruction.
5457   setForcedEncodingSize(0);
5458   setForcedDPP(false);
5459   setForcedSDWA(false);
5460 
5461   if (Name.endswith("_e64")) {
5462     setForcedEncodingSize(64);
5463     return Name.substr(0, Name.size() - 4);
5464   } else if (Name.endswith("_e32")) {
5465     setForcedEncodingSize(32);
5466     return Name.substr(0, Name.size() - 4);
5467   } else if (Name.endswith("_dpp")) {
5468     setForcedDPP(true);
5469     return Name.substr(0, Name.size() - 4);
5470   } else if (Name.endswith("_sdwa")) {
5471     setForcedSDWA(true);
5472     return Name.substr(0, Name.size() - 5);
5473   }
5474   return Name;
5475 }
5476 
5477 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5478                                        StringRef Name,
5479                                        SMLoc NameLoc, OperandVector &Operands) {
5480   // Add the instruction mnemonic
5481   Name = parseMnemonicSuffix(Name);
5482   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5483 
5484   bool IsMIMG = Name.startswith("image_");
5485 
5486   while (!trySkipToken(AsmToken::EndOfStatement)) {
5487     OperandMode Mode = OperandMode_Default;
5488     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5489       Mode = OperandMode_NSA;
5490     CPolSeen = 0;
5491     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5492 
5493     if (Res != MatchOperand_Success) {
5494       checkUnsupportedInstruction(Name, NameLoc);
5495       if (!Parser.hasPendingError()) {
5496         // FIXME: use real operand location rather than the current location.
5497         StringRef Msg =
5498           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5499                                             "not a valid operand.";
5500         Error(getLoc(), Msg);
5501       }
5502       while (!trySkipToken(AsmToken::EndOfStatement)) {
5503         lex();
5504       }
5505       return true;
5506     }
5507 
5508     // Eat the comma or space if there is one.
5509     trySkipToken(AsmToken::Comma);
5510   }
5511 
5512   return false;
5513 }
5514 
5515 //===----------------------------------------------------------------------===//
5516 // Utility functions
5517 //===----------------------------------------------------------------------===//
5518 
5519 OperandMatchResultTy
5520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5521 
5522   if (!trySkipId(Prefix, AsmToken::Colon))
5523     return MatchOperand_NoMatch;
5524 
5525   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5526 }
5527 
5528 OperandMatchResultTy
5529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5530                                     AMDGPUOperand::ImmTy ImmTy,
5531                                     bool (*ConvertResult)(int64_t&)) {
5532   SMLoc S = getLoc();
5533   int64_t Value = 0;
5534 
5535   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5536   if (Res != MatchOperand_Success)
5537     return Res;
5538 
5539   if (ConvertResult && !ConvertResult(Value)) {
5540     Error(S, "invalid " + StringRef(Prefix) + " value.");
5541   }
5542 
5543   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5544   return MatchOperand_Success;
5545 }
5546 
5547 OperandMatchResultTy
5548 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5549                                              OperandVector &Operands,
5550                                              AMDGPUOperand::ImmTy ImmTy,
5551                                              bool (*ConvertResult)(int64_t&)) {
5552   SMLoc S = getLoc();
5553   if (!trySkipId(Prefix, AsmToken::Colon))
5554     return MatchOperand_NoMatch;
5555 
5556   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5557     return MatchOperand_ParseFail;
5558 
5559   unsigned Val = 0;
5560   const unsigned MaxSize = 4;
5561 
5562   // FIXME: How to verify the number of elements matches the number of src
5563   // operands?
5564   for (int I = 0; ; ++I) {
5565     int64_t Op;
5566     SMLoc Loc = getLoc();
5567     if (!parseExpr(Op))
5568       return MatchOperand_ParseFail;
5569 
5570     if (Op != 0 && Op != 1) {
5571       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5572       return MatchOperand_ParseFail;
5573     }
5574 
5575     Val |= (Op << I);
5576 
5577     if (trySkipToken(AsmToken::RBrac))
5578       break;
5579 
5580     if (I + 1 == MaxSize) {
5581       Error(getLoc(), "expected a closing square bracket");
5582       return MatchOperand_ParseFail;
5583     }
5584 
5585     if (!skipToken(AsmToken::Comma, "expected a comma"))
5586       return MatchOperand_ParseFail;
5587   }
5588 
5589   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5590   return MatchOperand_Success;
5591 }
5592 
5593 OperandMatchResultTy
5594 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5595                                AMDGPUOperand::ImmTy ImmTy) {
5596   int64_t Bit;
5597   SMLoc S = getLoc();
5598 
5599   if (trySkipId(Name)) {
5600     Bit = 1;
5601   } else if (trySkipId("no", Name)) {
5602     Bit = 0;
5603   } else {
5604     return MatchOperand_NoMatch;
5605   }
5606 
5607   if (Name == "r128" && !hasMIMG_R128()) {
5608     Error(S, "r128 modifier is not supported on this GPU");
5609     return MatchOperand_ParseFail;
5610   }
5611   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5612     Error(S, "a16 modifier is not supported on this GPU");
5613     return MatchOperand_ParseFail;
5614   }
5615 
5616   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5617     ImmTy = AMDGPUOperand::ImmTyR128A16;
5618 
5619   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5620   return MatchOperand_Success;
5621 }
5622 
5623 OperandMatchResultTy
5624 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5625   unsigned CPolOn = 0;
5626   unsigned CPolOff = 0;
5627   SMLoc S = getLoc();
5628 
5629   if (trySkipId("glc"))
5630     CPolOn = AMDGPU::CPol::GLC;
5631   else if (trySkipId("noglc"))
5632     CPolOff = AMDGPU::CPol::GLC;
5633   else if (trySkipId("slc"))
5634     CPolOn = AMDGPU::CPol::SLC;
5635   else if (trySkipId("noslc"))
5636     CPolOff = AMDGPU::CPol::SLC;
5637   else if (trySkipId("dlc"))
5638     CPolOn = AMDGPU::CPol::DLC;
5639   else if (trySkipId("nodlc"))
5640     CPolOff = AMDGPU::CPol::DLC;
5641   else if (trySkipId("scc"))
5642     CPolOn = AMDGPU::CPol::SCC;
5643   else if (trySkipId("noscc"))
5644     CPolOff = AMDGPU::CPol::SCC;
5645   else
5646     return MatchOperand_NoMatch;
5647 
5648   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5649     Error(S, "dlc modifier is not supported on this GPU");
5650     return MatchOperand_ParseFail;
5651   }
5652 
5653   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5654     Error(S, "scc modifier is not supported on this GPU");
5655     return MatchOperand_ParseFail;
5656   }
5657 
5658   if (CPolSeen & (CPolOn | CPolOff)) {
5659     Error(S, "duplicate cache policy modifier");
5660     return MatchOperand_ParseFail;
5661   }
5662 
5663   CPolSeen |= (CPolOn | CPolOff);
5664 
5665   for (unsigned I = 1; I != Operands.size(); ++I) {
5666     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5667     if (Op.isCPol()) {
5668       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5669       return MatchOperand_Success;
5670     }
5671   }
5672 
5673   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5674                                               AMDGPUOperand::ImmTyCPol));
5675 
5676   return MatchOperand_Success;
5677 }
5678 
5679 static void addOptionalImmOperand(
5680   MCInst& Inst, const OperandVector& Operands,
5681   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5682   AMDGPUOperand::ImmTy ImmT,
5683   int64_t Default = 0) {
5684   auto i = OptionalIdx.find(ImmT);
5685   if (i != OptionalIdx.end()) {
5686     unsigned Idx = i->second;
5687     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5688   } else {
5689     Inst.addOperand(MCOperand::createImm(Default));
5690   }
5691 }
5692 
5693 OperandMatchResultTy
5694 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5695                                        StringRef &Value,
5696                                        SMLoc &StringLoc) {
5697   if (!trySkipId(Prefix, AsmToken::Colon))
5698     return MatchOperand_NoMatch;
5699 
5700   StringLoc = getLoc();
5701   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5702                                                   : MatchOperand_ParseFail;
5703 }
5704 
5705 //===----------------------------------------------------------------------===//
5706 // MTBUF format
5707 //===----------------------------------------------------------------------===//
5708 
5709 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5710                                   int64_t MaxVal,
5711                                   int64_t &Fmt) {
5712   int64_t Val;
5713   SMLoc Loc = getLoc();
5714 
5715   auto Res = parseIntWithPrefix(Pref, Val);
5716   if (Res == MatchOperand_ParseFail)
5717     return false;
5718   if (Res == MatchOperand_NoMatch)
5719     return true;
5720 
5721   if (Val < 0 || Val > MaxVal) {
5722     Error(Loc, Twine("out of range ", StringRef(Pref)));
5723     return false;
5724   }
5725 
5726   Fmt = Val;
5727   return true;
5728 }
5729 
5730 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5731 // values to live in a joint format operand in the MCInst encoding.
5732 OperandMatchResultTy
5733 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5734   using namespace llvm::AMDGPU::MTBUFFormat;
5735 
5736   int64_t Dfmt = DFMT_UNDEF;
5737   int64_t Nfmt = NFMT_UNDEF;
5738 
5739   // dfmt and nfmt can appear in either order, and each is optional.
5740   for (int I = 0; I < 2; ++I) {
5741     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5742       return MatchOperand_ParseFail;
5743 
5744     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5745       return MatchOperand_ParseFail;
5746     }
5747     // Skip optional comma between dfmt/nfmt
5748     // but guard against 2 commas following each other.
5749     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5750         !peekToken().is(AsmToken::Comma)) {
5751       trySkipToken(AsmToken::Comma);
5752     }
5753   }
5754 
5755   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5756     return MatchOperand_NoMatch;
5757 
5758   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5759   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5760 
5761   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5762   return MatchOperand_Success;
5763 }
5764 
5765 OperandMatchResultTy
5766 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5767   using namespace llvm::AMDGPU::MTBUFFormat;
5768 
5769   int64_t Fmt = UFMT_UNDEF;
5770 
5771   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5772     return MatchOperand_ParseFail;
5773 
5774   if (Fmt == UFMT_UNDEF)
5775     return MatchOperand_NoMatch;
5776 
5777   Format = Fmt;
5778   return MatchOperand_Success;
5779 }
5780 
5781 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5782                                     int64_t &Nfmt,
5783                                     StringRef FormatStr,
5784                                     SMLoc Loc) {
5785   using namespace llvm::AMDGPU::MTBUFFormat;
5786   int64_t Format;
5787 
5788   Format = getDfmt(FormatStr);
5789   if (Format != DFMT_UNDEF) {
5790     Dfmt = Format;
5791     return true;
5792   }
5793 
5794   Format = getNfmt(FormatStr, getSTI());
5795   if (Format != NFMT_UNDEF) {
5796     Nfmt = Format;
5797     return true;
5798   }
5799 
5800   Error(Loc, "unsupported format");
5801   return false;
5802 }
5803 
5804 OperandMatchResultTy
5805 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5806                                           SMLoc FormatLoc,
5807                                           int64_t &Format) {
5808   using namespace llvm::AMDGPU::MTBUFFormat;
5809 
5810   int64_t Dfmt = DFMT_UNDEF;
5811   int64_t Nfmt = NFMT_UNDEF;
5812   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5813     return MatchOperand_ParseFail;
5814 
5815   if (trySkipToken(AsmToken::Comma)) {
5816     StringRef Str;
5817     SMLoc Loc = getLoc();
5818     if (!parseId(Str, "expected a format string") ||
5819         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5820       return MatchOperand_ParseFail;
5821     }
5822     if (Dfmt == DFMT_UNDEF) {
5823       Error(Loc, "duplicate numeric format");
5824       return MatchOperand_ParseFail;
5825     } else if (Nfmt == NFMT_UNDEF) {
5826       Error(Loc, "duplicate data format");
5827       return MatchOperand_ParseFail;
5828     }
5829   }
5830 
5831   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5832   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5833 
5834   if (isGFX10Plus()) {
5835     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5836     if (Ufmt == UFMT_UNDEF) {
5837       Error(FormatLoc, "unsupported format");
5838       return MatchOperand_ParseFail;
5839     }
5840     Format = Ufmt;
5841   } else {
5842     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5843   }
5844 
5845   return MatchOperand_Success;
5846 }
5847 
5848 OperandMatchResultTy
5849 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5850                                             SMLoc Loc,
5851                                             int64_t &Format) {
5852   using namespace llvm::AMDGPU::MTBUFFormat;
5853 
5854   auto Id = getUnifiedFormat(FormatStr);
5855   if (Id == UFMT_UNDEF)
5856     return MatchOperand_NoMatch;
5857 
5858   if (!isGFX10Plus()) {
5859     Error(Loc, "unified format is not supported on this GPU");
5860     return MatchOperand_ParseFail;
5861   }
5862 
5863   Format = Id;
5864   return MatchOperand_Success;
5865 }
5866 
5867 OperandMatchResultTy
5868 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5869   using namespace llvm::AMDGPU::MTBUFFormat;
5870   SMLoc Loc = getLoc();
5871 
5872   if (!parseExpr(Format))
5873     return MatchOperand_ParseFail;
5874   if (!isValidFormatEncoding(Format, getSTI())) {
5875     Error(Loc, "out of range format");
5876     return MatchOperand_ParseFail;
5877   }
5878 
5879   return MatchOperand_Success;
5880 }
5881 
5882 OperandMatchResultTy
5883 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5884   using namespace llvm::AMDGPU::MTBUFFormat;
5885 
5886   if (!trySkipId("format", AsmToken::Colon))
5887     return MatchOperand_NoMatch;
5888 
5889   if (trySkipToken(AsmToken::LBrac)) {
5890     StringRef FormatStr;
5891     SMLoc Loc = getLoc();
5892     if (!parseId(FormatStr, "expected a format string"))
5893       return MatchOperand_ParseFail;
5894 
5895     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5896     if (Res == MatchOperand_NoMatch)
5897       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5898     if (Res != MatchOperand_Success)
5899       return Res;
5900 
5901     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5902       return MatchOperand_ParseFail;
5903 
5904     return MatchOperand_Success;
5905   }
5906 
5907   return parseNumericFormat(Format);
5908 }
5909 
5910 OperandMatchResultTy
5911 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5912   using namespace llvm::AMDGPU::MTBUFFormat;
5913 
5914   int64_t Format = getDefaultFormatEncoding(getSTI());
5915   OperandMatchResultTy Res;
5916   SMLoc Loc = getLoc();
5917 
5918   // Parse legacy format syntax.
5919   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5920   if (Res == MatchOperand_ParseFail)
5921     return Res;
5922 
5923   bool FormatFound = (Res == MatchOperand_Success);
5924 
5925   Operands.push_back(
5926     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5927 
5928   if (FormatFound)
5929     trySkipToken(AsmToken::Comma);
5930 
5931   if (isToken(AsmToken::EndOfStatement)) {
5932     // We are expecting an soffset operand,
5933     // but let matcher handle the error.
5934     return MatchOperand_Success;
5935   }
5936 
5937   // Parse soffset.
5938   Res = parseRegOrImm(Operands);
5939   if (Res != MatchOperand_Success)
5940     return Res;
5941 
5942   trySkipToken(AsmToken::Comma);
5943 
5944   if (!FormatFound) {
5945     Res = parseSymbolicOrNumericFormat(Format);
5946     if (Res == MatchOperand_ParseFail)
5947       return Res;
5948     if (Res == MatchOperand_Success) {
5949       auto Size = Operands.size();
5950       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5951       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5952       Op.setImm(Format);
5953     }
5954     return MatchOperand_Success;
5955   }
5956 
5957   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5958     Error(getLoc(), "duplicate format");
5959     return MatchOperand_ParseFail;
5960   }
5961   return MatchOperand_Success;
5962 }
5963 
5964 //===----------------------------------------------------------------------===//
5965 // ds
5966 //===----------------------------------------------------------------------===//
5967 
5968 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5969                                     const OperandVector &Operands) {
5970   OptionalImmIndexMap OptionalIdx;
5971 
5972   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5974 
5975     // Add the register arguments
5976     if (Op.isReg()) {
5977       Op.addRegOperands(Inst, 1);
5978       continue;
5979     }
5980 
5981     // Handle optional arguments
5982     OptionalIdx[Op.getImmTy()] = i;
5983   }
5984 
5985   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5986   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5987   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5988 
5989   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5990 }
5991 
5992 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5993                                 bool IsGdsHardcoded) {
5994   OptionalImmIndexMap OptionalIdx;
5995 
5996   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5997     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5998 
5999     // Add the register arguments
6000     if (Op.isReg()) {
6001       Op.addRegOperands(Inst, 1);
6002       continue;
6003     }
6004 
6005     if (Op.isToken() && Op.getToken() == "gds") {
6006       IsGdsHardcoded = true;
6007       continue;
6008     }
6009 
6010     // Handle optional arguments
6011     OptionalIdx[Op.getImmTy()] = i;
6012   }
6013 
6014   AMDGPUOperand::ImmTy OffsetType =
6015     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6016      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6017      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6018                                                       AMDGPUOperand::ImmTyOffset;
6019 
6020   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6021 
6022   if (!IsGdsHardcoded) {
6023     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6024   }
6025   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6026 }
6027 
6028 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6029   OptionalImmIndexMap OptionalIdx;
6030 
6031   unsigned OperandIdx[4];
6032   unsigned EnMask = 0;
6033   int SrcIdx = 0;
6034 
6035   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6036     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6037 
6038     // Add the register arguments
6039     if (Op.isReg()) {
6040       assert(SrcIdx < 4);
6041       OperandIdx[SrcIdx] = Inst.size();
6042       Op.addRegOperands(Inst, 1);
6043       ++SrcIdx;
6044       continue;
6045     }
6046 
6047     if (Op.isOff()) {
6048       assert(SrcIdx < 4);
6049       OperandIdx[SrcIdx] = Inst.size();
6050       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6051       ++SrcIdx;
6052       continue;
6053     }
6054 
6055     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6056       Op.addImmOperands(Inst, 1);
6057       continue;
6058     }
6059 
6060     if (Op.isToken() && Op.getToken() == "done")
6061       continue;
6062 
6063     // Handle optional arguments
6064     OptionalIdx[Op.getImmTy()] = i;
6065   }
6066 
6067   assert(SrcIdx == 4);
6068 
6069   bool Compr = false;
6070   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6071     Compr = true;
6072     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6073     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6074     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6075   }
6076 
6077   for (auto i = 0; i < SrcIdx; ++i) {
6078     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6079       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6080     }
6081   }
6082 
6083   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6084   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6085 
6086   Inst.addOperand(MCOperand::createImm(EnMask));
6087 }
6088 
6089 //===----------------------------------------------------------------------===//
6090 // s_waitcnt
6091 //===----------------------------------------------------------------------===//
6092 
6093 static bool
6094 encodeCnt(
6095   const AMDGPU::IsaVersion ISA,
6096   int64_t &IntVal,
6097   int64_t CntVal,
6098   bool Saturate,
6099   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6100   unsigned (*decode)(const IsaVersion &Version, unsigned))
6101 {
6102   bool Failed = false;
6103 
6104   IntVal = encode(ISA, IntVal, CntVal);
6105   if (CntVal != decode(ISA, IntVal)) {
6106     if (Saturate) {
6107       IntVal = encode(ISA, IntVal, -1);
6108     } else {
6109       Failed = true;
6110     }
6111   }
6112   return Failed;
6113 }
6114 
6115 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6116 
6117   SMLoc CntLoc = getLoc();
6118   StringRef CntName = getTokenStr();
6119 
6120   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6121       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6122     return false;
6123 
6124   int64_t CntVal;
6125   SMLoc ValLoc = getLoc();
6126   if (!parseExpr(CntVal))
6127     return false;
6128 
6129   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6130 
6131   bool Failed = true;
6132   bool Sat = CntName.endswith("_sat");
6133 
6134   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6135     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6136   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6137     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6138   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6139     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6140   } else {
6141     Error(CntLoc, "invalid counter name " + CntName);
6142     return false;
6143   }
6144 
6145   if (Failed) {
6146     Error(ValLoc, "too large value for " + CntName);
6147     return false;
6148   }
6149 
6150   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6151     return false;
6152 
6153   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6154     if (isToken(AsmToken::EndOfStatement)) {
6155       Error(getLoc(), "expected a counter name");
6156       return false;
6157     }
6158   }
6159 
6160   return true;
6161 }
6162 
6163 OperandMatchResultTy
6164 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6165   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6166   int64_t Waitcnt = getWaitcntBitMask(ISA);
6167   SMLoc S = getLoc();
6168 
6169   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6170     while (!isToken(AsmToken::EndOfStatement)) {
6171       if (!parseCnt(Waitcnt))
6172         return MatchOperand_ParseFail;
6173     }
6174   } else {
6175     if (!parseExpr(Waitcnt))
6176       return MatchOperand_ParseFail;
6177   }
6178 
6179   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6180   return MatchOperand_Success;
6181 }
6182 
6183 bool
6184 AMDGPUOperand::isSWaitCnt() const {
6185   return isImm();
6186 }
6187 
6188 //===----------------------------------------------------------------------===//
6189 // hwreg
6190 //===----------------------------------------------------------------------===//
6191 
6192 bool
6193 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6194                                 OperandInfoTy &Offset,
6195                                 OperandInfoTy &Width) {
6196   using namespace llvm::AMDGPU::Hwreg;
6197 
6198   // The register may be specified by name or using a numeric code
6199   HwReg.Loc = getLoc();
6200   if (isToken(AsmToken::Identifier) &&
6201       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) {
6202     HwReg.IsSymbolic = true;
6203     lex(); // skip register name
6204   } else if (!parseExpr(HwReg.Id, "a register name")) {
6205     return false;
6206   }
6207 
6208   if (trySkipToken(AsmToken::RParen))
6209     return true;
6210 
6211   // parse optional params
6212   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6213     return false;
6214 
6215   Offset.Loc = getLoc();
6216   if (!parseExpr(Offset.Id))
6217     return false;
6218 
6219   if (!skipToken(AsmToken::Comma, "expected a comma"))
6220     return false;
6221 
6222   Width.Loc = getLoc();
6223   return parseExpr(Width.Id) &&
6224          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6225 }
6226 
6227 bool
6228 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6229                                const OperandInfoTy &Offset,
6230                                const OperandInfoTy &Width) {
6231 
6232   using namespace llvm::AMDGPU::Hwreg;
6233 
6234   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6235     Error(HwReg.Loc,
6236           "specified hardware register is not supported on this GPU");
6237     return false;
6238   }
6239   if (!isValidHwreg(HwReg.Id)) {
6240     Error(HwReg.Loc,
6241           "invalid code of hardware register: only 6-bit values are legal");
6242     return false;
6243   }
6244   if (!isValidHwregOffset(Offset.Id)) {
6245     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6246     return false;
6247   }
6248   if (!isValidHwregWidth(Width.Id)) {
6249     Error(Width.Loc,
6250           "invalid bitfield width: only values from 1 to 32 are legal");
6251     return false;
6252   }
6253   return true;
6254 }
6255 
6256 OperandMatchResultTy
6257 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6258   using namespace llvm::AMDGPU::Hwreg;
6259 
6260   int64_t ImmVal = 0;
6261   SMLoc Loc = getLoc();
6262 
6263   if (trySkipId("hwreg", AsmToken::LParen)) {
6264     OperandInfoTy HwReg(ID_UNKNOWN_);
6265     OperandInfoTy Offset(OFFSET_DEFAULT_);
6266     OperandInfoTy Width(WIDTH_DEFAULT_);
6267     if (parseHwregBody(HwReg, Offset, Width) &&
6268         validateHwreg(HwReg, Offset, Width)) {
6269       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6270     } else {
6271       return MatchOperand_ParseFail;
6272     }
6273   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6274     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6275       Error(Loc, "invalid immediate: only 16-bit values are legal");
6276       return MatchOperand_ParseFail;
6277     }
6278   } else {
6279     return MatchOperand_ParseFail;
6280   }
6281 
6282   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6283   return MatchOperand_Success;
6284 }
6285 
6286 bool AMDGPUOperand::isHwreg() const {
6287   return isImmTy(ImmTyHwreg);
6288 }
6289 
6290 //===----------------------------------------------------------------------===//
6291 // sendmsg
6292 //===----------------------------------------------------------------------===//
6293 
6294 bool
6295 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6296                                   OperandInfoTy &Op,
6297                                   OperandInfoTy &Stream) {
6298   using namespace llvm::AMDGPU::SendMsg;
6299 
6300   Msg.Loc = getLoc();
6301   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6302     Msg.IsSymbolic = true;
6303     lex(); // skip message name
6304   } else if (!parseExpr(Msg.Id, "a message name")) {
6305     return false;
6306   }
6307 
6308   if (trySkipToken(AsmToken::Comma)) {
6309     Op.IsDefined = true;
6310     Op.Loc = getLoc();
6311     if (isToken(AsmToken::Identifier) &&
6312         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6313       lex(); // skip operation name
6314     } else if (!parseExpr(Op.Id, "an operation name")) {
6315       return false;
6316     }
6317 
6318     if (trySkipToken(AsmToken::Comma)) {
6319       Stream.IsDefined = true;
6320       Stream.Loc = getLoc();
6321       if (!parseExpr(Stream.Id))
6322         return false;
6323     }
6324   }
6325 
6326   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6327 }
6328 
6329 bool
6330 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6331                                  const OperandInfoTy &Op,
6332                                  const OperandInfoTy &Stream) {
6333   using namespace llvm::AMDGPU::SendMsg;
6334 
6335   // Validation strictness depends on whether message is specified
6336   // in a symbolic or in a numeric form. In the latter case
6337   // only encoding possibility is checked.
6338   bool Strict = Msg.IsSymbolic;
6339 
6340   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6341     Error(Msg.Loc, "invalid message id");
6342     return false;
6343   }
6344   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6345     if (Op.IsDefined) {
6346       Error(Op.Loc, "message does not support operations");
6347     } else {
6348       Error(Msg.Loc, "missing message operation");
6349     }
6350     return false;
6351   }
6352   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6353     Error(Op.Loc, "invalid operation id");
6354     return false;
6355   }
6356   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6357     Error(Stream.Loc, "message operation does not support streams");
6358     return false;
6359   }
6360   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6361     Error(Stream.Loc, "invalid message stream id");
6362     return false;
6363   }
6364   return true;
6365 }
6366 
6367 OperandMatchResultTy
6368 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6369   using namespace llvm::AMDGPU::SendMsg;
6370 
6371   int64_t ImmVal = 0;
6372   SMLoc Loc = getLoc();
6373 
6374   if (trySkipId("sendmsg", AsmToken::LParen)) {
6375     OperandInfoTy Msg(ID_UNKNOWN_);
6376     OperandInfoTy Op(OP_NONE_);
6377     OperandInfoTy Stream(STREAM_ID_NONE_);
6378     if (parseSendMsgBody(Msg, Op, Stream) &&
6379         validateSendMsg(Msg, Op, Stream)) {
6380       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6381     } else {
6382       return MatchOperand_ParseFail;
6383     }
6384   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6385     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6386       Error(Loc, "invalid immediate: only 16-bit values are legal");
6387       return MatchOperand_ParseFail;
6388     }
6389   } else {
6390     return MatchOperand_ParseFail;
6391   }
6392 
6393   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6394   return MatchOperand_Success;
6395 }
6396 
6397 bool AMDGPUOperand::isSendMsg() const {
6398   return isImmTy(ImmTySendMsg);
6399 }
6400 
6401 //===----------------------------------------------------------------------===//
6402 // v_interp
6403 //===----------------------------------------------------------------------===//
6404 
6405 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6406   StringRef Str;
6407   SMLoc S = getLoc();
6408 
6409   if (!parseId(Str))
6410     return MatchOperand_NoMatch;
6411 
6412   int Slot = StringSwitch<int>(Str)
6413     .Case("p10", 0)
6414     .Case("p20", 1)
6415     .Case("p0", 2)
6416     .Default(-1);
6417 
6418   if (Slot == -1) {
6419     Error(S, "invalid interpolation slot");
6420     return MatchOperand_ParseFail;
6421   }
6422 
6423   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6424                                               AMDGPUOperand::ImmTyInterpSlot));
6425   return MatchOperand_Success;
6426 }
6427 
6428 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6429   StringRef Str;
6430   SMLoc S = getLoc();
6431 
6432   if (!parseId(Str))
6433     return MatchOperand_NoMatch;
6434 
6435   if (!Str.startswith("attr")) {
6436     Error(S, "invalid interpolation attribute");
6437     return MatchOperand_ParseFail;
6438   }
6439 
6440   StringRef Chan = Str.take_back(2);
6441   int AttrChan = StringSwitch<int>(Chan)
6442     .Case(".x", 0)
6443     .Case(".y", 1)
6444     .Case(".z", 2)
6445     .Case(".w", 3)
6446     .Default(-1);
6447   if (AttrChan == -1) {
6448     Error(S, "invalid or missing interpolation attribute channel");
6449     return MatchOperand_ParseFail;
6450   }
6451 
6452   Str = Str.drop_back(2).drop_front(4);
6453 
6454   uint8_t Attr;
6455   if (Str.getAsInteger(10, Attr)) {
6456     Error(S, "invalid or missing interpolation attribute number");
6457     return MatchOperand_ParseFail;
6458   }
6459 
6460   if (Attr > 63) {
6461     Error(S, "out of bounds interpolation attribute number");
6462     return MatchOperand_ParseFail;
6463   }
6464 
6465   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6466 
6467   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6468                                               AMDGPUOperand::ImmTyInterpAttr));
6469   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6470                                               AMDGPUOperand::ImmTyAttrChan));
6471   return MatchOperand_Success;
6472 }
6473 
6474 //===----------------------------------------------------------------------===//
6475 // exp
6476 //===----------------------------------------------------------------------===//
6477 
6478 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6479   using namespace llvm::AMDGPU::Exp;
6480 
6481   StringRef Str;
6482   SMLoc S = getLoc();
6483 
6484   if (!parseId(Str))
6485     return MatchOperand_NoMatch;
6486 
6487   unsigned Id = getTgtId(Str);
6488   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6489     Error(S, (Id == ET_INVALID) ?
6490                 "invalid exp target" :
6491                 "exp target is not supported on this GPU");
6492     return MatchOperand_ParseFail;
6493   }
6494 
6495   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6496                                               AMDGPUOperand::ImmTyExpTgt));
6497   return MatchOperand_Success;
6498 }
6499 
6500 //===----------------------------------------------------------------------===//
6501 // parser helpers
6502 //===----------------------------------------------------------------------===//
6503 
6504 bool
6505 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6506   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6507 }
6508 
6509 bool
6510 AMDGPUAsmParser::isId(const StringRef Id) const {
6511   return isId(getToken(), Id);
6512 }
6513 
6514 bool
6515 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6516   return getTokenKind() == Kind;
6517 }
6518 
6519 bool
6520 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6521   if (isId(Id)) {
6522     lex();
6523     return true;
6524   }
6525   return false;
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6530   if (isToken(AsmToken::Identifier)) {
6531     StringRef Tok = getTokenStr();
6532     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6533       lex();
6534       return true;
6535     }
6536   }
6537   return false;
6538 }
6539 
6540 bool
6541 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6542   if (isId(Id) && peekToken().is(Kind)) {
6543     lex();
6544     lex();
6545     return true;
6546   }
6547   return false;
6548 }
6549 
6550 bool
6551 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6552   if (isToken(Kind)) {
6553     lex();
6554     return true;
6555   }
6556   return false;
6557 }
6558 
6559 bool
6560 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6561                            const StringRef ErrMsg) {
6562   if (!trySkipToken(Kind)) {
6563     Error(getLoc(), ErrMsg);
6564     return false;
6565   }
6566   return true;
6567 }
6568 
6569 bool
6570 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6571   SMLoc S = getLoc();
6572 
6573   const MCExpr *Expr;
6574   if (Parser.parseExpression(Expr))
6575     return false;
6576 
6577   if (Expr->evaluateAsAbsolute(Imm))
6578     return true;
6579 
6580   if (Expected.empty()) {
6581     Error(S, "expected absolute expression");
6582   } else {
6583     Error(S, Twine("expected ", Expected) +
6584              Twine(" or an absolute expression"));
6585   }
6586   return false;
6587 }
6588 
6589 bool
6590 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6591   SMLoc S = getLoc();
6592 
6593   const MCExpr *Expr;
6594   if (Parser.parseExpression(Expr))
6595     return false;
6596 
6597   int64_t IntVal;
6598   if (Expr->evaluateAsAbsolute(IntVal)) {
6599     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6600   } else {
6601     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6602   }
6603   return true;
6604 }
6605 
6606 bool
6607 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6608   if (isToken(AsmToken::String)) {
6609     Val = getToken().getStringContents();
6610     lex();
6611     return true;
6612   } else {
6613     Error(getLoc(), ErrMsg);
6614     return false;
6615   }
6616 }
6617 
6618 bool
6619 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6620   if (isToken(AsmToken::Identifier)) {
6621     Val = getTokenStr();
6622     lex();
6623     return true;
6624   } else {
6625     if (!ErrMsg.empty())
6626       Error(getLoc(), ErrMsg);
6627     return false;
6628   }
6629 }
6630 
6631 AsmToken
6632 AMDGPUAsmParser::getToken() const {
6633   return Parser.getTok();
6634 }
6635 
6636 AsmToken
6637 AMDGPUAsmParser::peekToken() {
6638   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6639 }
6640 
6641 void
6642 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6643   auto TokCount = getLexer().peekTokens(Tokens);
6644 
6645   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6646     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6647 }
6648 
6649 AsmToken::TokenKind
6650 AMDGPUAsmParser::getTokenKind() const {
6651   return getLexer().getKind();
6652 }
6653 
6654 SMLoc
6655 AMDGPUAsmParser::getLoc() const {
6656   return getToken().getLoc();
6657 }
6658 
6659 StringRef
6660 AMDGPUAsmParser::getTokenStr() const {
6661   return getToken().getString();
6662 }
6663 
6664 void
6665 AMDGPUAsmParser::lex() {
6666   Parser.Lex();
6667 }
6668 
6669 SMLoc
6670 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6671                                const OperandVector &Operands) const {
6672   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6673     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6674     if (Test(Op))
6675       return Op.getStartLoc();
6676   }
6677   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6678 }
6679 
6680 SMLoc
6681 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6682                            const OperandVector &Operands) const {
6683   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6684   return getOperandLoc(Test, Operands);
6685 }
6686 
6687 SMLoc
6688 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6689                            const OperandVector &Operands) const {
6690   auto Test = [=](const AMDGPUOperand& Op) {
6691     return Op.isRegKind() && Op.getReg() == Reg;
6692   };
6693   return getOperandLoc(Test, Operands);
6694 }
6695 
6696 SMLoc
6697 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6698   auto Test = [](const AMDGPUOperand& Op) {
6699     return Op.IsImmKindLiteral() || Op.isExpr();
6700   };
6701   return getOperandLoc(Test, Operands);
6702 }
6703 
6704 SMLoc
6705 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6706   auto Test = [](const AMDGPUOperand& Op) {
6707     return Op.isImmKindConst();
6708   };
6709   return getOperandLoc(Test, Operands);
6710 }
6711 
6712 //===----------------------------------------------------------------------===//
6713 // swizzle
6714 //===----------------------------------------------------------------------===//
6715 
6716 LLVM_READNONE
6717 static unsigned
6718 encodeBitmaskPerm(const unsigned AndMask,
6719                   const unsigned OrMask,
6720                   const unsigned XorMask) {
6721   using namespace llvm::AMDGPU::Swizzle;
6722 
6723   return BITMASK_PERM_ENC |
6724          (AndMask << BITMASK_AND_SHIFT) |
6725          (OrMask  << BITMASK_OR_SHIFT)  |
6726          (XorMask << BITMASK_XOR_SHIFT);
6727 }
6728 
6729 bool
6730 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6731                                      const unsigned MinVal,
6732                                      const unsigned MaxVal,
6733                                      const StringRef ErrMsg,
6734                                      SMLoc &Loc) {
6735   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6736     return false;
6737   }
6738   Loc = getLoc();
6739   if (!parseExpr(Op)) {
6740     return false;
6741   }
6742   if (Op < MinVal || Op > MaxVal) {
6743     Error(Loc, ErrMsg);
6744     return false;
6745   }
6746 
6747   return true;
6748 }
6749 
6750 bool
6751 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6752                                       const unsigned MinVal,
6753                                       const unsigned MaxVal,
6754                                       const StringRef ErrMsg) {
6755   SMLoc Loc;
6756   for (unsigned i = 0; i < OpNum; ++i) {
6757     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6758       return false;
6759   }
6760 
6761   return true;
6762 }
6763 
6764 bool
6765 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6766   using namespace llvm::AMDGPU::Swizzle;
6767 
6768   int64_t Lane[LANE_NUM];
6769   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6770                            "expected a 2-bit lane id")) {
6771     Imm = QUAD_PERM_ENC;
6772     for (unsigned I = 0; I < LANE_NUM; ++I) {
6773       Imm |= Lane[I] << (LANE_SHIFT * I);
6774     }
6775     return true;
6776   }
6777   return false;
6778 }
6779 
6780 bool
6781 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6782   using namespace llvm::AMDGPU::Swizzle;
6783 
6784   SMLoc Loc;
6785   int64_t GroupSize;
6786   int64_t LaneIdx;
6787 
6788   if (!parseSwizzleOperand(GroupSize,
6789                            2, 32,
6790                            "group size must be in the interval [2,32]",
6791                            Loc)) {
6792     return false;
6793   }
6794   if (!isPowerOf2_64(GroupSize)) {
6795     Error(Loc, "group size must be a power of two");
6796     return false;
6797   }
6798   if (parseSwizzleOperand(LaneIdx,
6799                           0, GroupSize - 1,
6800                           "lane id must be in the interval [0,group size - 1]",
6801                           Loc)) {
6802     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6803     return true;
6804   }
6805   return false;
6806 }
6807 
6808 bool
6809 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6810   using namespace llvm::AMDGPU::Swizzle;
6811 
6812   SMLoc Loc;
6813   int64_t GroupSize;
6814 
6815   if (!parseSwizzleOperand(GroupSize,
6816                            2, 32,
6817                            "group size must be in the interval [2,32]",
6818                            Loc)) {
6819     return false;
6820   }
6821   if (!isPowerOf2_64(GroupSize)) {
6822     Error(Loc, "group size must be a power of two");
6823     return false;
6824   }
6825 
6826   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6827   return true;
6828 }
6829 
6830 bool
6831 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6832   using namespace llvm::AMDGPU::Swizzle;
6833 
6834   SMLoc Loc;
6835   int64_t GroupSize;
6836 
6837   if (!parseSwizzleOperand(GroupSize,
6838                            1, 16,
6839                            "group size must be in the interval [1,16]",
6840                            Loc)) {
6841     return false;
6842   }
6843   if (!isPowerOf2_64(GroupSize)) {
6844     Error(Loc, "group size must be a power of two");
6845     return false;
6846   }
6847 
6848   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6849   return true;
6850 }
6851 
6852 bool
6853 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6854   using namespace llvm::AMDGPU::Swizzle;
6855 
6856   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6857     return false;
6858   }
6859 
6860   StringRef Ctl;
6861   SMLoc StrLoc = getLoc();
6862   if (!parseString(Ctl)) {
6863     return false;
6864   }
6865   if (Ctl.size() != BITMASK_WIDTH) {
6866     Error(StrLoc, "expected a 5-character mask");
6867     return false;
6868   }
6869 
6870   unsigned AndMask = 0;
6871   unsigned OrMask = 0;
6872   unsigned XorMask = 0;
6873 
6874   for (size_t i = 0; i < Ctl.size(); ++i) {
6875     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6876     switch(Ctl[i]) {
6877     default:
6878       Error(StrLoc, "invalid mask");
6879       return false;
6880     case '0':
6881       break;
6882     case '1':
6883       OrMask |= Mask;
6884       break;
6885     case 'p':
6886       AndMask |= Mask;
6887       break;
6888     case 'i':
6889       AndMask |= Mask;
6890       XorMask |= Mask;
6891       break;
6892     }
6893   }
6894 
6895   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6896   return true;
6897 }
6898 
6899 bool
6900 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6901 
6902   SMLoc OffsetLoc = getLoc();
6903 
6904   if (!parseExpr(Imm, "a swizzle macro")) {
6905     return false;
6906   }
6907   if (!isUInt<16>(Imm)) {
6908     Error(OffsetLoc, "expected a 16-bit offset");
6909     return false;
6910   }
6911   return true;
6912 }
6913 
6914 bool
6915 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6916   using namespace llvm::AMDGPU::Swizzle;
6917 
6918   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6919 
6920     SMLoc ModeLoc = getLoc();
6921     bool Ok = false;
6922 
6923     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6924       Ok = parseSwizzleQuadPerm(Imm);
6925     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6926       Ok = parseSwizzleBitmaskPerm(Imm);
6927     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6928       Ok = parseSwizzleBroadcast(Imm);
6929     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6930       Ok = parseSwizzleSwap(Imm);
6931     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6932       Ok = parseSwizzleReverse(Imm);
6933     } else {
6934       Error(ModeLoc, "expected a swizzle mode");
6935     }
6936 
6937     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6938   }
6939 
6940   return false;
6941 }
6942 
6943 OperandMatchResultTy
6944 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6945   SMLoc S = getLoc();
6946   int64_t Imm = 0;
6947 
6948   if (trySkipId("offset")) {
6949 
6950     bool Ok = false;
6951     if (skipToken(AsmToken::Colon, "expected a colon")) {
6952       if (trySkipId("swizzle")) {
6953         Ok = parseSwizzleMacro(Imm);
6954       } else {
6955         Ok = parseSwizzleOffset(Imm);
6956       }
6957     }
6958 
6959     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6960 
6961     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6962   } else {
6963     // Swizzle "offset" operand is optional.
6964     // If it is omitted, try parsing other optional operands.
6965     return parseOptionalOpr(Operands);
6966   }
6967 }
6968 
6969 bool
6970 AMDGPUOperand::isSwizzle() const {
6971   return isImmTy(ImmTySwizzle);
6972 }
6973 
6974 //===----------------------------------------------------------------------===//
6975 // VGPR Index Mode
6976 //===----------------------------------------------------------------------===//
6977 
6978 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6979 
6980   using namespace llvm::AMDGPU::VGPRIndexMode;
6981 
6982   if (trySkipToken(AsmToken::RParen)) {
6983     return OFF;
6984   }
6985 
6986   int64_t Imm = 0;
6987 
6988   while (true) {
6989     unsigned Mode = 0;
6990     SMLoc S = getLoc();
6991 
6992     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6993       if (trySkipId(IdSymbolic[ModeId])) {
6994         Mode = 1 << ModeId;
6995         break;
6996       }
6997     }
6998 
6999     if (Mode == 0) {
7000       Error(S, (Imm == 0)?
7001                "expected a VGPR index mode or a closing parenthesis" :
7002                "expected a VGPR index mode");
7003       return UNDEF;
7004     }
7005 
7006     if (Imm & Mode) {
7007       Error(S, "duplicate VGPR index mode");
7008       return UNDEF;
7009     }
7010     Imm |= Mode;
7011 
7012     if (trySkipToken(AsmToken::RParen))
7013       break;
7014     if (!skipToken(AsmToken::Comma,
7015                    "expected a comma or a closing parenthesis"))
7016       return UNDEF;
7017   }
7018 
7019   return Imm;
7020 }
7021 
7022 OperandMatchResultTy
7023 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7024 
7025   using namespace llvm::AMDGPU::VGPRIndexMode;
7026 
7027   int64_t Imm = 0;
7028   SMLoc S = getLoc();
7029 
7030   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7031     Imm = parseGPRIdxMacro();
7032     if (Imm == UNDEF)
7033       return MatchOperand_ParseFail;
7034   } else {
7035     if (getParser().parseAbsoluteExpression(Imm))
7036       return MatchOperand_ParseFail;
7037     if (Imm < 0 || !isUInt<4>(Imm)) {
7038       Error(S, "invalid immediate: only 4-bit values are legal");
7039       return MatchOperand_ParseFail;
7040     }
7041   }
7042 
7043   Operands.push_back(
7044       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7045   return MatchOperand_Success;
7046 }
7047 
7048 bool AMDGPUOperand::isGPRIdxMode() const {
7049   return isImmTy(ImmTyGprIdxMode);
7050 }
7051 
7052 //===----------------------------------------------------------------------===//
7053 // sopp branch targets
7054 //===----------------------------------------------------------------------===//
7055 
7056 OperandMatchResultTy
7057 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7058 
7059   // Make sure we are not parsing something
7060   // that looks like a label or an expression but is not.
7061   // This will improve error messages.
7062   if (isRegister() || isModifier())
7063     return MatchOperand_NoMatch;
7064 
7065   if (!parseExpr(Operands))
7066     return MatchOperand_ParseFail;
7067 
7068   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7069   assert(Opr.isImm() || Opr.isExpr());
7070   SMLoc Loc = Opr.getStartLoc();
7071 
7072   // Currently we do not support arbitrary expressions as branch targets.
7073   // Only labels and absolute expressions are accepted.
7074   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7075     Error(Loc, "expected an absolute expression or a label");
7076   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7077     Error(Loc, "expected a 16-bit signed jump offset");
7078   }
7079 
7080   return MatchOperand_Success;
7081 }
7082 
7083 //===----------------------------------------------------------------------===//
7084 // Boolean holding registers
7085 //===----------------------------------------------------------------------===//
7086 
7087 OperandMatchResultTy
7088 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7089   return parseReg(Operands);
7090 }
7091 
7092 //===----------------------------------------------------------------------===//
7093 // mubuf
7094 //===----------------------------------------------------------------------===//
7095 
7096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7097   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7098 }
7099 
7100 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7101                                    const OperandVector &Operands,
7102                                    bool IsAtomic,
7103                                    bool IsLds) {
7104   bool IsLdsOpcode = IsLds;
7105   bool HasLdsModifier = false;
7106   OptionalImmIndexMap OptionalIdx;
7107   unsigned FirstOperandIdx = 1;
7108   bool IsAtomicReturn = false;
7109 
7110   if (IsAtomic) {
7111     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7112       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7113       if (!Op.isCPol())
7114         continue;
7115       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7116       break;
7117     }
7118 
7119     if (!IsAtomicReturn) {
7120       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7121       if (NewOpc != -1)
7122         Inst.setOpcode(NewOpc);
7123     }
7124 
7125     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7126                       SIInstrFlags::IsAtomicRet;
7127   }
7128 
7129   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7130     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7131 
7132     // Add the register arguments
7133     if (Op.isReg()) {
7134       Op.addRegOperands(Inst, 1);
7135       // Insert a tied src for atomic return dst.
7136       // This cannot be postponed as subsequent calls to
7137       // addImmOperands rely on correct number of MC operands.
7138       if (IsAtomicReturn && i == FirstOperandIdx)
7139         Op.addRegOperands(Inst, 1);
7140       continue;
7141     }
7142 
7143     // Handle the case where soffset is an immediate
7144     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7145       Op.addImmOperands(Inst, 1);
7146       continue;
7147     }
7148 
7149     HasLdsModifier |= Op.isLDS();
7150 
7151     // Handle tokens like 'offen' which are sometimes hard-coded into the
7152     // asm string.  There are no MCInst operands for these.
7153     if (Op.isToken()) {
7154       continue;
7155     }
7156     assert(Op.isImm());
7157 
7158     // Handle optional arguments
7159     OptionalIdx[Op.getImmTy()] = i;
7160   }
7161 
7162   // This is a workaround for an llvm quirk which may result in an
7163   // incorrect instruction selection. Lds and non-lds versions of
7164   // MUBUF instructions are identical except that lds versions
7165   // have mandatory 'lds' modifier. However this modifier follows
7166   // optional modifiers and llvm asm matcher regards this 'lds'
7167   // modifier as an optional one. As a result, an lds version
7168   // of opcode may be selected even if it has no 'lds' modifier.
7169   if (IsLdsOpcode && !HasLdsModifier) {
7170     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7171     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7172       Inst.setOpcode(NoLdsOpcode);
7173       IsLdsOpcode = false;
7174     }
7175   }
7176 
7177   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7178   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7179 
7180   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7181     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7182   }
7183   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7184 }
7185 
7186 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7187   OptionalImmIndexMap OptionalIdx;
7188 
7189   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7190     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7191 
7192     // Add the register arguments
7193     if (Op.isReg()) {
7194       Op.addRegOperands(Inst, 1);
7195       continue;
7196     }
7197 
7198     // Handle the case where soffset is an immediate
7199     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7200       Op.addImmOperands(Inst, 1);
7201       continue;
7202     }
7203 
7204     // Handle tokens like 'offen' which are sometimes hard-coded into the
7205     // asm string.  There are no MCInst operands for these.
7206     if (Op.isToken()) {
7207       continue;
7208     }
7209     assert(Op.isImm());
7210 
7211     // Handle optional arguments
7212     OptionalIdx[Op.getImmTy()] = i;
7213   }
7214 
7215   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7216                         AMDGPUOperand::ImmTyOffset);
7217   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7218   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7219   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7220   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7221 }
7222 
7223 //===----------------------------------------------------------------------===//
7224 // mimg
7225 //===----------------------------------------------------------------------===//
7226 
7227 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7228                               bool IsAtomic) {
7229   unsigned I = 1;
7230   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7231   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7232     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7233   }
7234 
7235   if (IsAtomic) {
7236     // Add src, same as dst
7237     assert(Desc.getNumDefs() == 1);
7238     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7239   }
7240 
7241   OptionalImmIndexMap OptionalIdx;
7242 
7243   for (unsigned E = Operands.size(); I != E; ++I) {
7244     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7245 
7246     // Add the register arguments
7247     if (Op.isReg()) {
7248       Op.addRegOperands(Inst, 1);
7249     } else if (Op.isImmModifier()) {
7250       OptionalIdx[Op.getImmTy()] = I;
7251     } else if (!Op.isToken()) {
7252       llvm_unreachable("unexpected operand type");
7253     }
7254   }
7255 
7256   bool IsGFX10Plus = isGFX10Plus();
7257 
7258   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7259   if (IsGFX10Plus)
7260     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7261   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7262   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7263   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7264   if (IsGFX10Plus)
7265     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7266   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7267     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7268   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7269   if (!IsGFX10Plus)
7270     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7271   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7272 }
7273 
7274 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7275   cvtMIMG(Inst, Operands, true);
7276 }
7277 
7278 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7279   OptionalImmIndexMap OptionalIdx;
7280   bool IsAtomicReturn = false;
7281 
7282   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7283     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7284     if (!Op.isCPol())
7285       continue;
7286     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7287     break;
7288   }
7289 
7290   if (!IsAtomicReturn) {
7291     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7292     if (NewOpc != -1)
7293       Inst.setOpcode(NewOpc);
7294   }
7295 
7296   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7297                     SIInstrFlags::IsAtomicRet;
7298 
7299   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7300     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7301 
7302     // Add the register arguments
7303     if (Op.isReg()) {
7304       Op.addRegOperands(Inst, 1);
7305       if (IsAtomicReturn && i == 1)
7306         Op.addRegOperands(Inst, 1);
7307       continue;
7308     }
7309 
7310     // Handle the case where soffset is an immediate
7311     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7312       Op.addImmOperands(Inst, 1);
7313       continue;
7314     }
7315 
7316     // Handle tokens like 'offen' which are sometimes hard-coded into the
7317     // asm string.  There are no MCInst operands for these.
7318     if (Op.isToken()) {
7319       continue;
7320     }
7321     assert(Op.isImm());
7322 
7323     // Handle optional arguments
7324     OptionalIdx[Op.getImmTy()] = i;
7325   }
7326 
7327   if ((int)Inst.getNumOperands() <=
7328       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7330   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7331 }
7332 
7333 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7334                                       const OperandVector &Operands) {
7335   for (unsigned I = 1; I < Operands.size(); ++I) {
7336     auto &Operand = (AMDGPUOperand &)*Operands[I];
7337     if (Operand.isReg())
7338       Operand.addRegOperands(Inst, 1);
7339   }
7340 
7341   Inst.addOperand(MCOperand::createImm(1)); // a16
7342 }
7343 
7344 //===----------------------------------------------------------------------===//
7345 // smrd
7346 //===----------------------------------------------------------------------===//
7347 
7348 bool AMDGPUOperand::isSMRDOffset8() const {
7349   return isImm() && isUInt<8>(getImm());
7350 }
7351 
7352 bool AMDGPUOperand::isSMEMOffset() const {
7353   return isImm(); // Offset range is checked later by validator.
7354 }
7355 
7356 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7357   // 32-bit literals are only supported on CI and we only want to use them
7358   // when the offset is > 8-bits.
7359   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7360 }
7361 
7362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7363   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7364 }
7365 
7366 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7367   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7368 }
7369 
7370 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7371   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7372 }
7373 
7374 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7375   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7376 }
7377 
7378 //===----------------------------------------------------------------------===//
7379 // vop3
7380 //===----------------------------------------------------------------------===//
7381 
7382 static bool ConvertOmodMul(int64_t &Mul) {
7383   if (Mul != 1 && Mul != 2 && Mul != 4)
7384     return false;
7385 
7386   Mul >>= 1;
7387   return true;
7388 }
7389 
7390 static bool ConvertOmodDiv(int64_t &Div) {
7391   if (Div == 1) {
7392     Div = 0;
7393     return true;
7394   }
7395 
7396   if (Div == 2) {
7397     Div = 3;
7398     return true;
7399   }
7400 
7401   return false;
7402 }
7403 
7404 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7405 // This is intentional and ensures compatibility with sp3.
7406 // See bug 35397 for details.
7407 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7408   if (BoundCtrl == 0 || BoundCtrl == 1) {
7409     BoundCtrl = 1;
7410     return true;
7411   }
7412   return false;
7413 }
7414 
7415 // Note: the order in this table matches the order of operands in AsmString.
7416 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7417   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7418   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7419   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7420   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7421   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7422   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7423   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7424   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7425   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7426   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7427   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7428   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7429   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7430   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7431   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7432   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7433   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7434   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7435   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7436   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7437   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7438   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7439   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7440   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7441   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7442   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7443   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7444   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7445   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7446   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7447   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7448   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7449   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7450   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7451   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7452   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7453   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7454   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7455   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7456   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7457   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7458 };
7459 
7460 void AMDGPUAsmParser::onBeginOfFile() {
7461   if (!getParser().getStreamer().getTargetStreamer() ||
7462       getSTI().getTargetTriple().getArch() == Triple::r600)
7463     return;
7464 
7465   if (!getTargetStreamer().getTargetID())
7466     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7467 
7468   if (isHsaAbiVersion3AndAbove(&getSTI()))
7469     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7470 }
7471 
7472 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7473 
7474   OperandMatchResultTy res = parseOptionalOpr(Operands);
7475 
7476   // This is a hack to enable hardcoded mandatory operands which follow
7477   // optional operands.
7478   //
7479   // Current design assumes that all operands after the first optional operand
7480   // are also optional. However implementation of some instructions violates
7481   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7482   //
7483   // To alleviate this problem, we have to (implicitly) parse extra operands
7484   // to make sure autogenerated parser of custom operands never hit hardcoded
7485   // mandatory operands.
7486 
7487   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7488     if (res != MatchOperand_Success ||
7489         isToken(AsmToken::EndOfStatement))
7490       break;
7491 
7492     trySkipToken(AsmToken::Comma);
7493     res = parseOptionalOpr(Operands);
7494   }
7495 
7496   return res;
7497 }
7498 
7499 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7500   OperandMatchResultTy res;
7501   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7502     // try to parse any optional operand here
7503     if (Op.IsBit) {
7504       res = parseNamedBit(Op.Name, Operands, Op.Type);
7505     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7506       res = parseOModOperand(Operands);
7507     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7508                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7509                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7510       res = parseSDWASel(Operands, Op.Name, Op.Type);
7511     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7512       res = parseSDWADstUnused(Operands);
7513     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7514                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7515                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7516                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7517       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7518                                         Op.ConvertResult);
7519     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7520       res = parseDim(Operands);
7521     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7522       res = parseCPol(Operands);
7523     } else {
7524       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7525     }
7526     if (res != MatchOperand_NoMatch) {
7527       return res;
7528     }
7529   }
7530   return MatchOperand_NoMatch;
7531 }
7532 
7533 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7534   StringRef Name = getTokenStr();
7535   if (Name == "mul") {
7536     return parseIntWithPrefix("mul", Operands,
7537                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7538   }
7539 
7540   if (Name == "div") {
7541     return parseIntWithPrefix("div", Operands,
7542                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7543   }
7544 
7545   return MatchOperand_NoMatch;
7546 }
7547 
7548 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7549   cvtVOP3P(Inst, Operands);
7550 
7551   int Opc = Inst.getOpcode();
7552 
7553   int SrcNum;
7554   const int Ops[] = { AMDGPU::OpName::src0,
7555                       AMDGPU::OpName::src1,
7556                       AMDGPU::OpName::src2 };
7557   for (SrcNum = 0;
7558        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7559        ++SrcNum);
7560   assert(SrcNum > 0);
7561 
7562   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7563   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7564 
7565   if ((OpSel & (1 << SrcNum)) != 0) {
7566     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7567     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7568     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7569   }
7570 }
7571 
7572 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7573       // 1. This operand is input modifiers
7574   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7575       // 2. This is not last operand
7576       && Desc.NumOperands > (OpNum + 1)
7577       // 3. Next operand is register class
7578       && Desc.OpInfo[OpNum + 1].RegClass != -1
7579       // 4. Next register is not tied to any other operand
7580       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7581 }
7582 
7583 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7584 {
7585   OptionalImmIndexMap OptionalIdx;
7586   unsigned Opc = Inst.getOpcode();
7587 
7588   unsigned I = 1;
7589   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7590   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7591     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7592   }
7593 
7594   for (unsigned E = Operands.size(); I != E; ++I) {
7595     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7596     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7597       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7598     } else if (Op.isInterpSlot() ||
7599                Op.isInterpAttr() ||
7600                Op.isAttrChan()) {
7601       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7602     } else if (Op.isImmModifier()) {
7603       OptionalIdx[Op.getImmTy()] = I;
7604     } else {
7605       llvm_unreachable("unhandled operand type");
7606     }
7607   }
7608 
7609   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7610     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7611   }
7612 
7613   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7614     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7615   }
7616 
7617   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7618     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7619   }
7620 }
7621 
7622 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7623                               OptionalImmIndexMap &OptionalIdx) {
7624   unsigned Opc = Inst.getOpcode();
7625 
7626   unsigned I = 1;
7627   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7628   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7629     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7630   }
7631 
7632   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7633     // This instruction has src modifiers
7634     for (unsigned E = Operands.size(); I != E; ++I) {
7635       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7636       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7637         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7638       } else if (Op.isImmModifier()) {
7639         OptionalIdx[Op.getImmTy()] = I;
7640       } else if (Op.isRegOrImm()) {
7641         Op.addRegOrImmOperands(Inst, 1);
7642       } else {
7643         llvm_unreachable("unhandled operand type");
7644       }
7645     }
7646   } else {
7647     // No src modifiers
7648     for (unsigned E = Operands.size(); I != E; ++I) {
7649       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7650       if (Op.isMod()) {
7651         OptionalIdx[Op.getImmTy()] = I;
7652       } else {
7653         Op.addRegOrImmOperands(Inst, 1);
7654       }
7655     }
7656   }
7657 
7658   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7659     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7660   }
7661 
7662   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7663     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7664   }
7665 
7666   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7667   // it has src2 register operand that is tied to dst operand
7668   // we don't allow modifiers for this operand in assembler so src2_modifiers
7669   // should be 0.
7670   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7671       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7672       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7673       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7674       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7675       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7676       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7677       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7678       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7679       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7680       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7681     auto it = Inst.begin();
7682     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7683     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7684     ++it;
7685     // Copy the operand to ensure it's not invalidated when Inst grows.
7686     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7687   }
7688 }
7689 
7690 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7691   OptionalImmIndexMap OptionalIdx;
7692   cvtVOP3(Inst, Operands, OptionalIdx);
7693 }
7694 
7695 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7696                                OptionalImmIndexMap &OptIdx) {
7697   const int Opc = Inst.getOpcode();
7698   const MCInstrDesc &Desc = MII.get(Opc);
7699 
7700   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7701 
7702   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7703     assert(!IsPacked);
7704     Inst.addOperand(Inst.getOperand(0));
7705   }
7706 
7707   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7708   // instruction, and then figure out where to actually put the modifiers
7709 
7710   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7711   if (OpSelIdx != -1) {
7712     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7713   }
7714 
7715   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7716   if (OpSelHiIdx != -1) {
7717     int DefaultVal = IsPacked ? -1 : 0;
7718     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7719                           DefaultVal);
7720   }
7721 
7722   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7723   if (NegLoIdx != -1) {
7724     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7725     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7726   }
7727 
7728   const int Ops[] = { AMDGPU::OpName::src0,
7729                       AMDGPU::OpName::src1,
7730                       AMDGPU::OpName::src2 };
7731   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7732                          AMDGPU::OpName::src1_modifiers,
7733                          AMDGPU::OpName::src2_modifiers };
7734 
7735   unsigned OpSel = 0;
7736   unsigned OpSelHi = 0;
7737   unsigned NegLo = 0;
7738   unsigned NegHi = 0;
7739 
7740   if (OpSelIdx != -1)
7741     OpSel = Inst.getOperand(OpSelIdx).getImm();
7742 
7743   if (OpSelHiIdx != -1)
7744     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7745 
7746   if (NegLoIdx != -1) {
7747     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7748     NegLo = Inst.getOperand(NegLoIdx).getImm();
7749     NegHi = Inst.getOperand(NegHiIdx).getImm();
7750   }
7751 
7752   for (int J = 0; J < 3; ++J) {
7753     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7754     if (OpIdx == -1)
7755       break;
7756 
7757     uint32_t ModVal = 0;
7758 
7759     if ((OpSel & (1 << J)) != 0)
7760       ModVal |= SISrcMods::OP_SEL_0;
7761 
7762     if ((OpSelHi & (1 << J)) != 0)
7763       ModVal |= SISrcMods::OP_SEL_1;
7764 
7765     if ((NegLo & (1 << J)) != 0)
7766       ModVal |= SISrcMods::NEG;
7767 
7768     if ((NegHi & (1 << J)) != 0)
7769       ModVal |= SISrcMods::NEG_HI;
7770 
7771     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7772 
7773     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7774   }
7775 }
7776 
7777 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7778   OptionalImmIndexMap OptIdx;
7779   cvtVOP3(Inst, Operands, OptIdx);
7780   cvtVOP3P(Inst, Operands, OptIdx);
7781 }
7782 
7783 //===----------------------------------------------------------------------===//
7784 // dpp
7785 //===----------------------------------------------------------------------===//
7786 
7787 bool AMDGPUOperand::isDPP8() const {
7788   return isImmTy(ImmTyDPP8);
7789 }
7790 
7791 bool AMDGPUOperand::isDPPCtrl() const {
7792   using namespace AMDGPU::DPP;
7793 
7794   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7795   if (result) {
7796     int64_t Imm = getImm();
7797     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7798            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7799            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7800            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7801            (Imm == DppCtrl::WAVE_SHL1) ||
7802            (Imm == DppCtrl::WAVE_ROL1) ||
7803            (Imm == DppCtrl::WAVE_SHR1) ||
7804            (Imm == DppCtrl::WAVE_ROR1) ||
7805            (Imm == DppCtrl::ROW_MIRROR) ||
7806            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7807            (Imm == DppCtrl::BCAST15) ||
7808            (Imm == DppCtrl::BCAST31) ||
7809            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7810            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7811   }
7812   return false;
7813 }
7814 
7815 //===----------------------------------------------------------------------===//
7816 // mAI
7817 //===----------------------------------------------------------------------===//
7818 
7819 bool AMDGPUOperand::isBLGP() const {
7820   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7821 }
7822 
7823 bool AMDGPUOperand::isCBSZ() const {
7824   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7825 }
7826 
7827 bool AMDGPUOperand::isABID() const {
7828   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7829 }
7830 
7831 bool AMDGPUOperand::isS16Imm() const {
7832   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7833 }
7834 
7835 bool AMDGPUOperand::isU16Imm() const {
7836   return isImm() && isUInt<16>(getImm());
7837 }
7838 
7839 //===----------------------------------------------------------------------===//
7840 // dim
7841 //===----------------------------------------------------------------------===//
7842 
7843 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7844   // We want to allow "dim:1D" etc.,
7845   // but the initial 1 is tokenized as an integer.
7846   std::string Token;
7847   if (isToken(AsmToken::Integer)) {
7848     SMLoc Loc = getToken().getEndLoc();
7849     Token = std::string(getTokenStr());
7850     lex();
7851     if (getLoc() != Loc)
7852       return false;
7853   }
7854 
7855   StringRef Suffix;
7856   if (!parseId(Suffix))
7857     return false;
7858   Token += Suffix;
7859 
7860   StringRef DimId = Token;
7861   if (DimId.startswith("SQ_RSRC_IMG_"))
7862     DimId = DimId.drop_front(12);
7863 
7864   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7865   if (!DimInfo)
7866     return false;
7867 
7868   Encoding = DimInfo->Encoding;
7869   return true;
7870 }
7871 
7872 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7873   if (!isGFX10Plus())
7874     return MatchOperand_NoMatch;
7875 
7876   SMLoc S = getLoc();
7877 
7878   if (!trySkipId("dim", AsmToken::Colon))
7879     return MatchOperand_NoMatch;
7880 
7881   unsigned Encoding;
7882   SMLoc Loc = getLoc();
7883   if (!parseDimId(Encoding)) {
7884     Error(Loc, "invalid dim value");
7885     return MatchOperand_ParseFail;
7886   }
7887 
7888   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7889                                               AMDGPUOperand::ImmTyDim));
7890   return MatchOperand_Success;
7891 }
7892 
7893 //===----------------------------------------------------------------------===//
7894 // dpp
7895 //===----------------------------------------------------------------------===//
7896 
7897 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7898   SMLoc S = getLoc();
7899 
7900   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7901     return MatchOperand_NoMatch;
7902 
7903   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7904 
7905   int64_t Sels[8];
7906 
7907   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7908     return MatchOperand_ParseFail;
7909 
7910   for (size_t i = 0; i < 8; ++i) {
7911     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7912       return MatchOperand_ParseFail;
7913 
7914     SMLoc Loc = getLoc();
7915     if (getParser().parseAbsoluteExpression(Sels[i]))
7916       return MatchOperand_ParseFail;
7917     if (0 > Sels[i] || 7 < Sels[i]) {
7918       Error(Loc, "expected a 3-bit value");
7919       return MatchOperand_ParseFail;
7920     }
7921   }
7922 
7923   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7924     return MatchOperand_ParseFail;
7925 
7926   unsigned DPP8 = 0;
7927   for (size_t i = 0; i < 8; ++i)
7928     DPP8 |= (Sels[i] << (i * 3));
7929 
7930   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7931   return MatchOperand_Success;
7932 }
7933 
7934 bool
7935 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7936                                     const OperandVector &Operands) {
7937   if (Ctrl == "row_newbcast")
7938     return isGFX90A();
7939 
7940   if (Ctrl == "row_share" ||
7941       Ctrl == "row_xmask")
7942     return isGFX10Plus();
7943 
7944   if (Ctrl == "wave_shl" ||
7945       Ctrl == "wave_shr" ||
7946       Ctrl == "wave_rol" ||
7947       Ctrl == "wave_ror" ||
7948       Ctrl == "row_bcast")
7949     return isVI() || isGFX9();
7950 
7951   return Ctrl == "row_mirror" ||
7952          Ctrl == "row_half_mirror" ||
7953          Ctrl == "quad_perm" ||
7954          Ctrl == "row_shl" ||
7955          Ctrl == "row_shr" ||
7956          Ctrl == "row_ror";
7957 }
7958 
7959 int64_t
7960 AMDGPUAsmParser::parseDPPCtrlPerm() {
7961   // quad_perm:[%d,%d,%d,%d]
7962 
7963   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7964     return -1;
7965 
7966   int64_t Val = 0;
7967   for (int i = 0; i < 4; ++i) {
7968     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7969       return -1;
7970 
7971     int64_t Temp;
7972     SMLoc Loc = getLoc();
7973     if (getParser().parseAbsoluteExpression(Temp))
7974       return -1;
7975     if (Temp < 0 || Temp > 3) {
7976       Error(Loc, "expected a 2-bit value");
7977       return -1;
7978     }
7979 
7980     Val += (Temp << i * 2);
7981   }
7982 
7983   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7984     return -1;
7985 
7986   return Val;
7987 }
7988 
7989 int64_t
7990 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7991   using namespace AMDGPU::DPP;
7992 
7993   // sel:%d
7994 
7995   int64_t Val;
7996   SMLoc Loc = getLoc();
7997 
7998   if (getParser().parseAbsoluteExpression(Val))
7999     return -1;
8000 
8001   struct DppCtrlCheck {
8002     int64_t Ctrl;
8003     int Lo;
8004     int Hi;
8005   };
8006 
8007   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8008     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8009     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8010     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8011     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8012     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8013     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8014     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8015     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8016     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8017     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8018     .Default({-1, 0, 0});
8019 
8020   bool Valid;
8021   if (Check.Ctrl == -1) {
8022     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8023     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8024   } else {
8025     Valid = Check.Lo <= Val && Val <= Check.Hi;
8026     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8027   }
8028 
8029   if (!Valid) {
8030     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8031     return -1;
8032   }
8033 
8034   return Val;
8035 }
8036 
8037 OperandMatchResultTy
8038 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8039   using namespace AMDGPU::DPP;
8040 
8041   if (!isToken(AsmToken::Identifier) ||
8042       !isSupportedDPPCtrl(getTokenStr(), Operands))
8043     return MatchOperand_NoMatch;
8044 
8045   SMLoc S = getLoc();
8046   int64_t Val = -1;
8047   StringRef Ctrl;
8048 
8049   parseId(Ctrl);
8050 
8051   if (Ctrl == "row_mirror") {
8052     Val = DppCtrl::ROW_MIRROR;
8053   } else if (Ctrl == "row_half_mirror") {
8054     Val = DppCtrl::ROW_HALF_MIRROR;
8055   } else {
8056     if (skipToken(AsmToken::Colon, "expected a colon")) {
8057       if (Ctrl == "quad_perm") {
8058         Val = parseDPPCtrlPerm();
8059       } else {
8060         Val = parseDPPCtrlSel(Ctrl);
8061       }
8062     }
8063   }
8064 
8065   if (Val == -1)
8066     return MatchOperand_ParseFail;
8067 
8068   Operands.push_back(
8069     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8070   return MatchOperand_Success;
8071 }
8072 
8073 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8074   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8075 }
8076 
8077 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8078   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8079 }
8080 
8081 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8082   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8083 }
8084 
8085 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8086   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8087 }
8088 
8089 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8090   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8091 }
8092 
8093 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8094   OptionalImmIndexMap OptionalIdx;
8095 
8096   unsigned Opc = Inst.getOpcode();
8097   bool HasModifiers =
8098       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8099   unsigned I = 1;
8100   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8101   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8102     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8103   }
8104 
8105   int Fi = 0;
8106   for (unsigned E = Operands.size(); I != E; ++I) {
8107     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8108                                             MCOI::TIED_TO);
8109     if (TiedTo != -1) {
8110       assert((unsigned)TiedTo < Inst.getNumOperands());
8111       // handle tied old or src2 for MAC instructions
8112       Inst.addOperand(Inst.getOperand(TiedTo));
8113     }
8114     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8115     // Add the register arguments
8116     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8117       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8118       // Skip it.
8119       continue;
8120     }
8121 
8122     if (IsDPP8) {
8123       if (Op.isDPP8()) {
8124         Op.addImmOperands(Inst, 1);
8125       } else if (HasModifiers &&
8126                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8127         Op.addRegWithFPInputModsOperands(Inst, 2);
8128       } else if (Op.isFI()) {
8129         Fi = Op.getImm();
8130       } else if (Op.isReg()) {
8131         Op.addRegOperands(Inst, 1);
8132       } else {
8133         llvm_unreachable("Invalid operand type");
8134       }
8135     } else {
8136       if (HasModifiers &&
8137           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8138         Op.addRegWithFPInputModsOperands(Inst, 2);
8139       } else if (Op.isReg()) {
8140         Op.addRegOperands(Inst, 1);
8141       } else if (Op.isDPPCtrl()) {
8142         Op.addImmOperands(Inst, 1);
8143       } else if (Op.isImm()) {
8144         // Handle optional arguments
8145         OptionalIdx[Op.getImmTy()] = I;
8146       } else {
8147         llvm_unreachable("Invalid operand type");
8148       }
8149     }
8150   }
8151 
8152   if (IsDPP8) {
8153     using namespace llvm::AMDGPU::DPP;
8154     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8155   } else {
8156     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8157     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8158     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8159     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8160       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8161     }
8162   }
8163 }
8164 
8165 //===----------------------------------------------------------------------===//
8166 // sdwa
8167 //===----------------------------------------------------------------------===//
8168 
8169 OperandMatchResultTy
8170 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8171                               AMDGPUOperand::ImmTy Type) {
8172   using namespace llvm::AMDGPU::SDWA;
8173 
8174   SMLoc S = getLoc();
8175   StringRef Value;
8176   OperandMatchResultTy res;
8177 
8178   SMLoc StringLoc;
8179   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8180   if (res != MatchOperand_Success) {
8181     return res;
8182   }
8183 
8184   int64_t Int;
8185   Int = StringSwitch<int64_t>(Value)
8186         .Case("BYTE_0", SdwaSel::BYTE_0)
8187         .Case("BYTE_1", SdwaSel::BYTE_1)
8188         .Case("BYTE_2", SdwaSel::BYTE_2)
8189         .Case("BYTE_3", SdwaSel::BYTE_3)
8190         .Case("WORD_0", SdwaSel::WORD_0)
8191         .Case("WORD_1", SdwaSel::WORD_1)
8192         .Case("DWORD", SdwaSel::DWORD)
8193         .Default(0xffffffff);
8194 
8195   if (Int == 0xffffffff) {
8196     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8197     return MatchOperand_ParseFail;
8198   }
8199 
8200   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8201   return MatchOperand_Success;
8202 }
8203 
8204 OperandMatchResultTy
8205 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8206   using namespace llvm::AMDGPU::SDWA;
8207 
8208   SMLoc S = getLoc();
8209   StringRef Value;
8210   OperandMatchResultTy res;
8211 
8212   SMLoc StringLoc;
8213   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8214   if (res != MatchOperand_Success) {
8215     return res;
8216   }
8217 
8218   int64_t Int;
8219   Int = StringSwitch<int64_t>(Value)
8220         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8221         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8222         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8223         .Default(0xffffffff);
8224 
8225   if (Int == 0xffffffff) {
8226     Error(StringLoc, "invalid dst_unused value");
8227     return MatchOperand_ParseFail;
8228   }
8229 
8230   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8231   return MatchOperand_Success;
8232 }
8233 
8234 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8235   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8236 }
8237 
8238 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8239   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8240 }
8241 
8242 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8243   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8244 }
8245 
8246 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8247   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8248 }
8249 
8250 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8251   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8252 }
8253 
8254 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8255                               uint64_t BasicInstType,
8256                               bool SkipDstVcc,
8257                               bool SkipSrcVcc) {
8258   using namespace llvm::AMDGPU::SDWA;
8259 
8260   OptionalImmIndexMap OptionalIdx;
8261   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8262   bool SkippedVcc = false;
8263 
8264   unsigned I = 1;
8265   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8266   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8267     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8268   }
8269 
8270   for (unsigned E = Operands.size(); I != E; ++I) {
8271     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8272     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8273         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8274       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8275       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8276       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8277       // Skip VCC only if we didn't skip it on previous iteration.
8278       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8279       if (BasicInstType == SIInstrFlags::VOP2 &&
8280           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8281            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8282         SkippedVcc = true;
8283         continue;
8284       } else if (BasicInstType == SIInstrFlags::VOPC &&
8285                  Inst.getNumOperands() == 0) {
8286         SkippedVcc = true;
8287         continue;
8288       }
8289     }
8290     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8291       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8292     } else if (Op.isImm()) {
8293       // Handle optional arguments
8294       OptionalIdx[Op.getImmTy()] = I;
8295     } else {
8296       llvm_unreachable("Invalid operand type");
8297     }
8298     SkippedVcc = false;
8299   }
8300 
8301   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8302       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8303       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8304     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8305     switch (BasicInstType) {
8306     case SIInstrFlags::VOP1:
8307       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8308       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8309         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8310       }
8311       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8312       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8313       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8314       break;
8315 
8316     case SIInstrFlags::VOP2:
8317       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8318       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8319         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8320       }
8321       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8322       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8323       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8324       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8325       break;
8326 
8327     case SIInstrFlags::VOPC:
8328       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8329         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8330       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8331       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8332       break;
8333 
8334     default:
8335       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8336     }
8337   }
8338 
8339   // special case v_mac_{f16, f32}:
8340   // it has src2 register operand that is tied to dst operand
8341   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8342       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8343     auto it = Inst.begin();
8344     std::advance(
8345       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8346     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8347   }
8348 }
8349 
8350 //===----------------------------------------------------------------------===//
8351 // mAI
8352 //===----------------------------------------------------------------------===//
8353 
8354 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8355   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8356 }
8357 
8358 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8359   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8360 }
8361 
8362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8363   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8364 }
8365 
8366 /// Force static initialization.
8367 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8368   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8369   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8370 }
8371 
8372 #define GET_REGISTER_MATCHER
8373 #define GET_MATCHER_IMPLEMENTATION
8374 #define GET_MNEMONIC_SPELL_CHECKER
8375 #define GET_MNEMONIC_CHECKER
8376 #include "AMDGPUGenAsmMatcher.inc"
8377 
8378 // This function should be defined after auto-generated include so that we have
8379 // MatchClassKind enum defined
8380 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8381                                                      unsigned Kind) {
8382   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8383   // But MatchInstructionImpl() expects to meet token and fails to validate
8384   // operand. This method checks if we are given immediate operand but expect to
8385   // get corresponding token.
8386   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8387   switch (Kind) {
8388   case MCK_addr64:
8389     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8390   case MCK_gds:
8391     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8392   case MCK_lds:
8393     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8394   case MCK_idxen:
8395     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8396   case MCK_offen:
8397     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8398   case MCK_SSrcB32:
8399     // When operands have expression values, they will return true for isToken,
8400     // because it is not possible to distinguish between a token and an
8401     // expression at parse time. MatchInstructionImpl() will always try to
8402     // match an operand as a token, when isToken returns true, and when the
8403     // name of the expression is not a valid token, the match will fail,
8404     // so we need to handle it here.
8405     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8406   case MCK_SSrcF32:
8407     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8408   case MCK_SoppBrTarget:
8409     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8410   case MCK_VReg32OrOff:
8411     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8412   case MCK_InterpSlot:
8413     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8414   case MCK_Attr:
8415     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8416   case MCK_AttrChan:
8417     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8418   case MCK_ImmSMEMOffset:
8419     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8420   case MCK_SReg_64:
8421   case MCK_SReg_64_XEXEC:
8422     // Null is defined as a 32-bit register but
8423     // it should also be enabled with 64-bit operands.
8424     // The following code enables it for SReg_64 operands
8425     // used as source and destination. Remaining source
8426     // operands are handled in isInlinableImm.
8427     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8428   default:
8429     return Match_InvalidOperand;
8430   }
8431 }
8432 
8433 //===----------------------------------------------------------------------===//
8434 // endpgm
8435 //===----------------------------------------------------------------------===//
8436 
8437 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8438   SMLoc S = getLoc();
8439   int64_t Imm = 0;
8440 
8441   if (!parseExpr(Imm)) {
8442     // The operand is optional, if not present default to 0
8443     Imm = 0;
8444   }
8445 
8446   if (!isUInt<16>(Imm)) {
8447     Error(S, "expected a 16-bit value");
8448     return MatchOperand_ParseFail;
8449   }
8450 
8451   Operands.push_back(
8452       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8453   return MatchOperand_Success;
8454 }
8455 
8456 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8457