1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   bool isGFX90A() const {
1390     return AMDGPU::isGFX90A(getSTI());
1391   }
1392 
1393   bool isGFX9Plus() const {
1394     return AMDGPU::isGFX9Plus(getSTI());
1395   }
1396 
1397   bool isGFX10() const {
1398     return AMDGPU::isGFX10(getSTI());
1399   }
1400 
1401   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1402 
1403   bool isGFX10_BEncoding() const {
1404     return AMDGPU::isGFX10_BEncoding(getSTI());
1405   }
1406 
1407   bool hasInv2PiInlineImm() const {
1408     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1409   }
1410 
1411   bool hasFlatOffsets() const {
1412     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1413   }
1414 
1415   bool hasArchitectedFlatScratch() const {
1416     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1417   }
1418 
1419   bool hasSGPR102_SGPR103() const {
1420     return !isVI() && !isGFX9();
1421   }
1422 
1423   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1424 
1425   bool hasIntClamp() const {
1426     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1427   }
1428 
1429   AMDGPUTargetStreamer &getTargetStreamer() {
1430     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1431     return static_cast<AMDGPUTargetStreamer &>(TS);
1432   }
1433 
1434   const MCRegisterInfo *getMRI() const {
1435     // We need this const_cast because for some reason getContext() is not const
1436     // in MCAsmParser.
1437     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1438   }
1439 
1440   const MCInstrInfo *getMII() const {
1441     return &MII;
1442   }
1443 
1444   const FeatureBitset &getFeatureBits() const {
1445     return getSTI().getFeatureBits();
1446   }
1447 
1448   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1449   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1450   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1451 
1452   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1453   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1454   bool isForcedDPP() const { return ForcedDPP; }
1455   bool isForcedSDWA() const { return ForcedSDWA; }
1456   ArrayRef<unsigned> getMatchedVariants() const;
1457   StringRef getMatchedVariantName() const;
1458 
1459   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1460   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1461                      bool RestoreOnFailure);
1462   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1463   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1464                                         SMLoc &EndLoc) override;
1465   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1466   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1467                                       unsigned Kind) override;
1468   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1469                                OperandVector &Operands, MCStreamer &Out,
1470                                uint64_t &ErrorInfo,
1471                                bool MatchingInlineAsm) override;
1472   bool ParseDirective(AsmToken DirectiveID) override;
1473   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1474                                     OperandMode Mode = OperandMode_Default);
1475   StringRef parseMnemonicSuffix(StringRef Name);
1476   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1477                         SMLoc NameLoc, OperandVector &Operands) override;
1478   //bool ProcessInstruction(MCInst &Inst);
1479 
1480   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1481 
1482   OperandMatchResultTy
1483   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1484                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1485                      bool (*ConvertResult)(int64_t &) = nullptr);
1486 
1487   OperandMatchResultTy
1488   parseOperandArrayWithPrefix(const char *Prefix,
1489                               OperandVector &Operands,
1490                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1491                               bool (*ConvertResult)(int64_t&) = nullptr);
1492 
1493   OperandMatchResultTy
1494   parseNamedBit(StringRef Name, OperandVector &Operands,
1495                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1496   OperandMatchResultTy parseCPol(OperandVector &Operands);
1497   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1498                                              StringRef &Value,
1499                                              SMLoc &StringLoc);
1500 
1501   bool isModifier();
1502   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1503   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1504   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1505   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1506   bool parseSP3NegModifier();
1507   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1508   OperandMatchResultTy parseReg(OperandVector &Operands);
1509   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1510   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1511   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1512   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1513   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1514   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1515   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1516   OperandMatchResultTy parseUfmt(int64_t &Format);
1517   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1518   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1519   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1520   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1521   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1522   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1523   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1524 
1525   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1526   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1527   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1528   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1529 
1530   bool parseCnt(int64_t &IntVal);
1531   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1532   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1533 
1534 private:
1535   struct OperandInfoTy {
1536     SMLoc Loc;
1537     int64_t Id;
1538     bool IsSymbolic = false;
1539     bool IsDefined = false;
1540 
1541     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1542   };
1543 
1544   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1545   bool validateSendMsg(const OperandInfoTy &Msg,
1546                        const OperandInfoTy &Op,
1547                        const OperandInfoTy &Stream);
1548 
1549   bool parseHwregBody(OperandInfoTy &HwReg,
1550                       OperandInfoTy &Offset,
1551                       OperandInfoTy &Width);
1552   bool validateHwreg(const OperandInfoTy &HwReg,
1553                      const OperandInfoTy &Offset,
1554                      const OperandInfoTy &Width);
1555 
1556   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1557   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1558 
1559   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1560                       const OperandVector &Operands) const;
1561   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1562   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1563   SMLoc getLitLoc(const OperandVector &Operands) const;
1564   SMLoc getConstLoc(const OperandVector &Operands) const;
1565 
1566   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1567   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1568   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1569   bool validateSOPLiteral(const MCInst &Inst) const;
1570   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1571   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1572   bool validateIntClampSupported(const MCInst &Inst);
1573   bool validateMIMGAtomicDMask(const MCInst &Inst);
1574   bool validateMIMGGatherDMask(const MCInst &Inst);
1575   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1576   bool validateMIMGDataSize(const MCInst &Inst);
1577   bool validateMIMGAddrSize(const MCInst &Inst);
1578   bool validateMIMGD16(const MCInst &Inst);
1579   bool validateMIMGDim(const MCInst &Inst);
1580   bool validateMIMGMSAA(const MCInst &Inst);
1581   bool validateOpSel(const MCInst &Inst);
1582   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1583   bool validateVccOperand(unsigned Reg) const;
1584   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1585   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1586   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1587   bool validateAGPRLdSt(const MCInst &Inst) const;
1588   bool validateVGPRAlign(const MCInst &Inst) const;
1589   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateDivScale(const MCInst &Inst);
1591   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1592                              const SMLoc &IDLoc);
1593   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1594   unsigned getConstantBusLimit(unsigned Opcode) const;
1595   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1596   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1597   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1598 
1599   bool isSupportedMnemo(StringRef Mnemo,
1600                         const FeatureBitset &FBS);
1601   bool isSupportedMnemo(StringRef Mnemo,
1602                         const FeatureBitset &FBS,
1603                         ArrayRef<unsigned> Variants);
1604   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1605 
1606   bool isId(const StringRef Id) const;
1607   bool isId(const AsmToken &Token, const StringRef Id) const;
1608   bool isToken(const AsmToken::TokenKind Kind) const;
1609   bool trySkipId(const StringRef Id);
1610   bool trySkipId(const StringRef Pref, const StringRef Id);
1611   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1612   bool trySkipToken(const AsmToken::TokenKind Kind);
1613   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1614   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1615   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1616 
1617   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1618   AsmToken::TokenKind getTokenKind() const;
1619   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1620   bool parseExpr(OperandVector &Operands);
1621   StringRef getTokenStr() const;
1622   AsmToken peekToken();
1623   AsmToken getToken() const;
1624   SMLoc getLoc() const;
1625   void lex();
1626 
1627 public:
1628   void onBeginOfFile() override;
1629 
1630   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1631   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1632 
1633   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1634   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1635   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1636   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1637   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1638   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1639 
1640   bool parseSwizzleOperand(int64_t &Op,
1641                            const unsigned MinVal,
1642                            const unsigned MaxVal,
1643                            const StringRef ErrMsg,
1644                            SMLoc &Loc);
1645   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1646                             const unsigned MinVal,
1647                             const unsigned MaxVal,
1648                             const StringRef ErrMsg);
1649   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1650   bool parseSwizzleOffset(int64_t &Imm);
1651   bool parseSwizzleMacro(int64_t &Imm);
1652   bool parseSwizzleQuadPerm(int64_t &Imm);
1653   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1654   bool parseSwizzleBroadcast(int64_t &Imm);
1655   bool parseSwizzleSwap(int64_t &Imm);
1656   bool parseSwizzleReverse(int64_t &Imm);
1657 
1658   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1659   int64_t parseGPRIdxMacro();
1660 
1661   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1662   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1663   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1664   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1665 
1666   AMDGPUOperand::Ptr defaultCPol() const;
1667 
1668   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1669   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1670   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1671   AMDGPUOperand::Ptr defaultFlatOffset() const;
1672 
1673   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1674 
1675   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1676                OptionalImmIndexMap &OptionalIdx);
1677   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1678   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1679   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1680   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1681                 OptionalImmIndexMap &OptionalIdx);
1682 
1683   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1684 
1685   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1686                bool IsAtomic = false);
1687   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1688   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1689 
1690   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1691 
1692   bool parseDimId(unsigned &Encoding);
1693   OperandMatchResultTy parseDim(OperandVector &Operands);
1694   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1695   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1696   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1697   int64_t parseDPPCtrlSel(StringRef Ctrl);
1698   int64_t parseDPPCtrlPerm();
1699   AMDGPUOperand::Ptr defaultRowMask() const;
1700   AMDGPUOperand::Ptr defaultBankMask() const;
1701   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1702   AMDGPUOperand::Ptr defaultFI() const;
1703   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1704   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1705 
1706   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1707                                     AMDGPUOperand::ImmTy Type);
1708   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1709   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1710   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1711   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1712   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1713   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1714   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1715                uint64_t BasicInstType,
1716                bool SkipDstVcc = false,
1717                bool SkipSrcVcc = false);
1718 
1719   AMDGPUOperand::Ptr defaultBLGP() const;
1720   AMDGPUOperand::Ptr defaultCBSZ() const;
1721   AMDGPUOperand::Ptr defaultABID() const;
1722 
1723   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1724   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1725 };
1726 
1727 struct OptionalOperand {
1728   const char *Name;
1729   AMDGPUOperand::ImmTy Type;
1730   bool IsBit;
1731   bool (*ConvertResult)(int64_t&);
1732 };
1733 
1734 } // end anonymous namespace
1735 
1736 // May be called with integer type with equivalent bitwidth.
1737 static const fltSemantics *getFltSemantics(unsigned Size) {
1738   switch (Size) {
1739   case 4:
1740     return &APFloat::IEEEsingle();
1741   case 8:
1742     return &APFloat::IEEEdouble();
1743   case 2:
1744     return &APFloat::IEEEhalf();
1745   default:
1746     llvm_unreachable("unsupported fp type");
1747   }
1748 }
1749 
1750 static const fltSemantics *getFltSemantics(MVT VT) {
1751   return getFltSemantics(VT.getSizeInBits() / 8);
1752 }
1753 
1754 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1755   switch (OperandType) {
1756   case AMDGPU::OPERAND_REG_IMM_INT32:
1757   case AMDGPU::OPERAND_REG_IMM_FP32:
1758   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1759   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1760   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1761   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1762   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1763   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1764   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1766   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1767   case AMDGPU::OPERAND_KIMM32:
1768     return &APFloat::IEEEsingle();
1769   case AMDGPU::OPERAND_REG_IMM_INT64:
1770   case AMDGPU::OPERAND_REG_IMM_FP64:
1771   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1772   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1773   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1774     return &APFloat::IEEEdouble();
1775   case AMDGPU::OPERAND_REG_IMM_INT16:
1776   case AMDGPU::OPERAND_REG_IMM_FP16:
1777   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1778   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1779   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1780   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1781   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1782   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1785   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1786   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1787   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1788   case AMDGPU::OPERAND_KIMM16:
1789     return &APFloat::IEEEhalf();
1790   default:
1791     llvm_unreachable("unsupported fp type");
1792   }
1793 }
1794 
1795 //===----------------------------------------------------------------------===//
1796 // Operand
1797 //===----------------------------------------------------------------------===//
1798 
1799 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1800   bool Lost;
1801 
1802   // Convert literal to single precision
1803   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1804                                                APFloat::rmNearestTiesToEven,
1805                                                &Lost);
1806   // We allow precision lost but not overflow or underflow
1807   if (Status != APFloat::opOK &&
1808       Lost &&
1809       ((Status & APFloat::opOverflow)  != 0 ||
1810        (Status & APFloat::opUnderflow) != 0)) {
1811     return false;
1812   }
1813 
1814   return true;
1815 }
1816 
1817 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1818   return isUIntN(Size, Val) || isIntN(Size, Val);
1819 }
1820 
1821 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1822   if (VT.getScalarType() == MVT::i16) {
1823     // FP immediate values are broken.
1824     return isInlinableIntLiteral(Val);
1825   }
1826 
1827   // f16/v2f16 operands work correctly for all values.
1828   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1829 }
1830 
1831 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1832 
1833   // This is a hack to enable named inline values like
1834   // shared_base with both 32-bit and 64-bit operands.
1835   // Note that these values are defined as
1836   // 32-bit operands only.
1837   if (isInlineValue()) {
1838     return true;
1839   }
1840 
1841   if (!isImmTy(ImmTyNone)) {
1842     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1843     return false;
1844   }
1845   // TODO: We should avoid using host float here. It would be better to
1846   // check the float bit values which is what a few other places do.
1847   // We've had bot failures before due to weird NaN support on mips hosts.
1848 
1849   APInt Literal(64, Imm.Val);
1850 
1851   if (Imm.IsFPImm) { // We got fp literal token
1852     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1853       return AMDGPU::isInlinableLiteral64(Imm.Val,
1854                                           AsmParser->hasInv2PiInlineImm());
1855     }
1856 
1857     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1858     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1859       return false;
1860 
1861     if (type.getScalarSizeInBits() == 16) {
1862       return isInlineableLiteralOp16(
1863         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1864         type, AsmParser->hasInv2PiInlineImm());
1865     }
1866 
1867     // Check if single precision literal is inlinable
1868     return AMDGPU::isInlinableLiteral32(
1869       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1870       AsmParser->hasInv2PiInlineImm());
1871   }
1872 
1873   // We got int literal token.
1874   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875     return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                         AsmParser->hasInv2PiInlineImm());
1877   }
1878 
1879   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1880     return false;
1881   }
1882 
1883   if (type.getScalarSizeInBits() == 16) {
1884     return isInlineableLiteralOp16(
1885       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1886       type, AsmParser->hasInv2PiInlineImm());
1887   }
1888 
1889   return AMDGPU::isInlinableLiteral32(
1890     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1891     AsmParser->hasInv2PiInlineImm());
1892 }
1893 
1894 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1895   // Check that this immediate can be added as literal
1896   if (!isImmTy(ImmTyNone)) {
1897     return false;
1898   }
1899 
1900   if (!Imm.IsFPImm) {
1901     // We got int literal token.
1902 
1903     if (type == MVT::f64 && hasFPModifiers()) {
1904       // Cannot apply fp modifiers to int literals preserving the same semantics
1905       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1906       // disable these cases.
1907       return false;
1908     }
1909 
1910     unsigned Size = type.getSizeInBits();
1911     if (Size == 64)
1912       Size = 32;
1913 
1914     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1915     // types.
1916     return isSafeTruncation(Imm.Val, Size);
1917   }
1918 
1919   // We got fp literal token
1920   if (type == MVT::f64) { // Expected 64-bit fp operand
1921     // We would set low 64-bits of literal to zeroes but we accept this literals
1922     return true;
1923   }
1924 
1925   if (type == MVT::i64) { // Expected 64-bit int operand
1926     // We don't allow fp literals in 64-bit integer instructions. It is
1927     // unclear how we should encode them.
1928     return false;
1929   }
1930 
1931   // We allow fp literals with f16x2 operands assuming that the specified
1932   // literal goes into the lower half and the upper half is zero. We also
1933   // require that the literal may be losslessly converted to f16.
1934   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1935                      (type == MVT::v2i16)? MVT::i16 :
1936                      (type == MVT::v2f32)? MVT::f32 : type;
1937 
1938   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1939   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1940 }
1941 
1942 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1943   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1944 }
1945 
1946 bool AMDGPUOperand::isVRegWithInputMods() const {
1947   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1948          // GFX90A allows DPP on 64-bit operands.
1949          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1950           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1951 }
1952 
1953 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1954   if (AsmParser->isVI())
1955     return isVReg32();
1956   else if (AsmParser->isGFX9Plus())
1957     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1958   else
1959     return false;
1960 }
1961 
1962 bool AMDGPUOperand::isSDWAFP16Operand() const {
1963   return isSDWAOperand(MVT::f16);
1964 }
1965 
1966 bool AMDGPUOperand::isSDWAFP32Operand() const {
1967   return isSDWAOperand(MVT::f32);
1968 }
1969 
1970 bool AMDGPUOperand::isSDWAInt16Operand() const {
1971   return isSDWAOperand(MVT::i16);
1972 }
1973 
1974 bool AMDGPUOperand::isSDWAInt32Operand() const {
1975   return isSDWAOperand(MVT::i32);
1976 }
1977 
1978 bool AMDGPUOperand::isBoolReg() const {
1979   auto FB = AsmParser->getFeatureBits();
1980   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1981                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1982 }
1983 
1984 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1985 {
1986   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1987   assert(Size == 2 || Size == 4 || Size == 8);
1988 
1989   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1990 
1991   if (Imm.Mods.Abs) {
1992     Val &= ~FpSignMask;
1993   }
1994   if (Imm.Mods.Neg) {
1995     Val ^= FpSignMask;
1996   }
1997 
1998   return Val;
1999 }
2000 
2001 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2002   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2003                              Inst.getNumOperands())) {
2004     addLiteralImmOperand(Inst, Imm.Val,
2005                          ApplyModifiers &
2006                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2007   } else {
2008     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2009     Inst.addOperand(MCOperand::createImm(Imm.Val));
2010     setImmKindNone();
2011   }
2012 }
2013 
2014 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2015   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2016   auto OpNum = Inst.getNumOperands();
2017   // Check that this operand accepts literals
2018   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2019 
2020   if (ApplyModifiers) {
2021     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2022     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2023     Val = applyInputFPModifiers(Val, Size);
2024   }
2025 
2026   APInt Literal(64, Val);
2027   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2028 
2029   if (Imm.IsFPImm) { // We got fp literal token
2030     switch (OpTy) {
2031     case AMDGPU::OPERAND_REG_IMM_INT64:
2032     case AMDGPU::OPERAND_REG_IMM_FP64:
2033     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2034     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2036       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2037                                        AsmParser->hasInv2PiInlineImm())) {
2038         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2039         setImmKindConst();
2040         return;
2041       }
2042 
2043       // Non-inlineable
2044       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2045         // For fp operands we check if low 32 bits are zeros
2046         if (Literal.getLoBits(32) != 0) {
2047           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2048           "Can't encode literal as exact 64-bit floating-point operand. "
2049           "Low 32-bits will be set to zero");
2050         }
2051 
2052         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2053         setImmKindLiteral();
2054         return;
2055       }
2056 
2057       // We don't allow fp literals in 64-bit integer instructions. It is
2058       // unclear how we should encode them. This case should be checked earlier
2059       // in predicate methods (isLiteralImm())
2060       llvm_unreachable("fp literal in 64-bit integer instruction.");
2061 
2062     case AMDGPU::OPERAND_REG_IMM_INT32:
2063     case AMDGPU::OPERAND_REG_IMM_FP32:
2064     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2065     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069     case AMDGPU::OPERAND_REG_IMM_INT16:
2070     case AMDGPU::OPERAND_REG_IMM_FP16:
2071     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2072     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2073     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2074     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2075     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2076     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2077     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2078     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2079     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2080     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2081     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2082     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2083     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2084     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2085     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2086     case AMDGPU::OPERAND_KIMM32:
2087     case AMDGPU::OPERAND_KIMM16: {
2088       bool lost;
2089       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2090       // Convert literal to single precision
2091       FPLiteral.convert(*getOpFltSemantics(OpTy),
2092                         APFloat::rmNearestTiesToEven, &lost);
2093       // We allow precision lost but not overflow or underflow. This should be
2094       // checked earlier in isLiteralImm()
2095 
2096       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2097       Inst.addOperand(MCOperand::createImm(ImmVal));
2098       setImmKindLiteral();
2099       return;
2100     }
2101     default:
2102       llvm_unreachable("invalid operand size");
2103     }
2104 
2105     return;
2106   }
2107 
2108   // We got int literal token.
2109   // Only sign extend inline immediates.
2110   switch (OpTy) {
2111   case AMDGPU::OPERAND_REG_IMM_INT32:
2112   case AMDGPU::OPERAND_REG_IMM_FP32:
2113   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2114   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2115   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2116   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2117   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2118   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2119   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2120   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2122   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2123   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2124     if (isSafeTruncation(Val, 32) &&
2125         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2126                                      AsmParser->hasInv2PiInlineImm())) {
2127       Inst.addOperand(MCOperand::createImm(Val));
2128       setImmKindConst();
2129       return;
2130     }
2131 
2132     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2133     setImmKindLiteral();
2134     return;
2135 
2136   case AMDGPU::OPERAND_REG_IMM_INT64:
2137   case AMDGPU::OPERAND_REG_IMM_FP64:
2138   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2139   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2140   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2141     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2142       Inst.addOperand(MCOperand::createImm(Val));
2143       setImmKindConst();
2144       return;
2145     }
2146 
2147     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2148     setImmKindLiteral();
2149     return;
2150 
2151   case AMDGPU::OPERAND_REG_IMM_INT16:
2152   case AMDGPU::OPERAND_REG_IMM_FP16:
2153   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2154   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2155   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2156   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2157   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2158     if (isSafeTruncation(Val, 16) &&
2159         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2160                                      AsmParser->hasInv2PiInlineImm())) {
2161       Inst.addOperand(MCOperand::createImm(Val));
2162       setImmKindConst();
2163       return;
2164     }
2165 
2166     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2167     setImmKindLiteral();
2168     return;
2169 
2170   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2171   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2172   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2173   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2174     assert(isSafeTruncation(Val, 16));
2175     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2176                                         AsmParser->hasInv2PiInlineImm()));
2177 
2178     Inst.addOperand(MCOperand::createImm(Val));
2179     return;
2180   }
2181   case AMDGPU::OPERAND_KIMM32:
2182     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2183     setImmKindNone();
2184     return;
2185   case AMDGPU::OPERAND_KIMM16:
2186     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2187     setImmKindNone();
2188     return;
2189   default:
2190     llvm_unreachable("invalid operand size");
2191   }
2192 }
2193 
2194 template <unsigned Bitwidth>
2195 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2196   APInt Literal(64, Imm.Val);
2197   setImmKindNone();
2198 
2199   if (!Imm.IsFPImm) {
2200     // We got int literal token.
2201     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2202     return;
2203   }
2204 
2205   bool Lost;
2206   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2207   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2208                     APFloat::rmNearestTiesToEven, &Lost);
2209   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2210 }
2211 
2212 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2213   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2214 }
2215 
2216 static bool isInlineValue(unsigned Reg) {
2217   switch (Reg) {
2218   case AMDGPU::SRC_SHARED_BASE:
2219   case AMDGPU::SRC_SHARED_LIMIT:
2220   case AMDGPU::SRC_PRIVATE_BASE:
2221   case AMDGPU::SRC_PRIVATE_LIMIT:
2222   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2223     return true;
2224   case AMDGPU::SRC_VCCZ:
2225   case AMDGPU::SRC_EXECZ:
2226   case AMDGPU::SRC_SCC:
2227     return true;
2228   case AMDGPU::SGPR_NULL:
2229     return true;
2230   default:
2231     return false;
2232   }
2233 }
2234 
2235 bool AMDGPUOperand::isInlineValue() const {
2236   return isRegKind() && ::isInlineValue(getReg());
2237 }
2238 
2239 //===----------------------------------------------------------------------===//
2240 // AsmParser
2241 //===----------------------------------------------------------------------===//
2242 
2243 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2244   if (Is == IS_VGPR) {
2245     switch (RegWidth) {
2246       default: return -1;
2247       case 1: return AMDGPU::VGPR_32RegClassID;
2248       case 2: return AMDGPU::VReg_64RegClassID;
2249       case 3: return AMDGPU::VReg_96RegClassID;
2250       case 4: return AMDGPU::VReg_128RegClassID;
2251       case 5: return AMDGPU::VReg_160RegClassID;
2252       case 6: return AMDGPU::VReg_192RegClassID;
2253       case 7: return AMDGPU::VReg_224RegClassID;
2254       case 8: return AMDGPU::VReg_256RegClassID;
2255       case 16: return AMDGPU::VReg_512RegClassID;
2256       case 32: return AMDGPU::VReg_1024RegClassID;
2257     }
2258   } else if (Is == IS_TTMP) {
2259     switch (RegWidth) {
2260       default: return -1;
2261       case 1: return AMDGPU::TTMP_32RegClassID;
2262       case 2: return AMDGPU::TTMP_64RegClassID;
2263       case 4: return AMDGPU::TTMP_128RegClassID;
2264       case 8: return AMDGPU::TTMP_256RegClassID;
2265       case 16: return AMDGPU::TTMP_512RegClassID;
2266     }
2267   } else if (Is == IS_SGPR) {
2268     switch (RegWidth) {
2269       default: return -1;
2270       case 1: return AMDGPU::SGPR_32RegClassID;
2271       case 2: return AMDGPU::SGPR_64RegClassID;
2272       case 3: return AMDGPU::SGPR_96RegClassID;
2273       case 4: return AMDGPU::SGPR_128RegClassID;
2274       case 5: return AMDGPU::SGPR_160RegClassID;
2275       case 6: return AMDGPU::SGPR_192RegClassID;
2276       case 7: return AMDGPU::SGPR_224RegClassID;
2277       case 8: return AMDGPU::SGPR_256RegClassID;
2278       case 16: return AMDGPU::SGPR_512RegClassID;
2279     }
2280   } else if (Is == IS_AGPR) {
2281     switch (RegWidth) {
2282       default: return -1;
2283       case 1: return AMDGPU::AGPR_32RegClassID;
2284       case 2: return AMDGPU::AReg_64RegClassID;
2285       case 3: return AMDGPU::AReg_96RegClassID;
2286       case 4: return AMDGPU::AReg_128RegClassID;
2287       case 5: return AMDGPU::AReg_160RegClassID;
2288       case 6: return AMDGPU::AReg_192RegClassID;
2289       case 7: return AMDGPU::AReg_224RegClassID;
2290       case 8: return AMDGPU::AReg_256RegClassID;
2291       case 16: return AMDGPU::AReg_512RegClassID;
2292       case 32: return AMDGPU::AReg_1024RegClassID;
2293     }
2294   }
2295   return -1;
2296 }
2297 
2298 static unsigned getSpecialRegForName(StringRef RegName) {
2299   return StringSwitch<unsigned>(RegName)
2300     .Case("exec", AMDGPU::EXEC)
2301     .Case("vcc", AMDGPU::VCC)
2302     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2303     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2304     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2305     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2306     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2307     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2308     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2309     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2310     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2311     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2312     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2313     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2314     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2315     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2316     .Case("m0", AMDGPU::M0)
2317     .Case("vccz", AMDGPU::SRC_VCCZ)
2318     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2319     .Case("execz", AMDGPU::SRC_EXECZ)
2320     .Case("src_execz", AMDGPU::SRC_EXECZ)
2321     .Case("scc", AMDGPU::SRC_SCC)
2322     .Case("src_scc", AMDGPU::SRC_SCC)
2323     .Case("tba", AMDGPU::TBA)
2324     .Case("tma", AMDGPU::TMA)
2325     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2326     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2327     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2328     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2329     .Case("vcc_lo", AMDGPU::VCC_LO)
2330     .Case("vcc_hi", AMDGPU::VCC_HI)
2331     .Case("exec_lo", AMDGPU::EXEC_LO)
2332     .Case("exec_hi", AMDGPU::EXEC_HI)
2333     .Case("tma_lo", AMDGPU::TMA_LO)
2334     .Case("tma_hi", AMDGPU::TMA_HI)
2335     .Case("tba_lo", AMDGPU::TBA_LO)
2336     .Case("tba_hi", AMDGPU::TBA_HI)
2337     .Case("pc", AMDGPU::PC_REG)
2338     .Case("null", AMDGPU::SGPR_NULL)
2339     .Default(AMDGPU::NoRegister);
2340 }
2341 
2342 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2343                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2344   auto R = parseRegister();
2345   if (!R) return true;
2346   assert(R->isReg());
2347   RegNo = R->getReg();
2348   StartLoc = R->getStartLoc();
2349   EndLoc = R->getEndLoc();
2350   return false;
2351 }
2352 
2353 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2354                                     SMLoc &EndLoc) {
2355   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2356 }
2357 
2358 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2359                                                        SMLoc &StartLoc,
2360                                                        SMLoc &EndLoc) {
2361   bool Result =
2362       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2363   bool PendingErrors = getParser().hasPendingError();
2364   getParser().clearPendingErrors();
2365   if (PendingErrors)
2366     return MatchOperand_ParseFail;
2367   if (Result)
2368     return MatchOperand_NoMatch;
2369   return MatchOperand_Success;
2370 }
2371 
2372 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2373                                             RegisterKind RegKind, unsigned Reg1,
2374                                             SMLoc Loc) {
2375   switch (RegKind) {
2376   case IS_SPECIAL:
2377     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2378       Reg = AMDGPU::EXEC;
2379       RegWidth = 2;
2380       return true;
2381     }
2382     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2383       Reg = AMDGPU::FLAT_SCR;
2384       RegWidth = 2;
2385       return true;
2386     }
2387     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2388       Reg = AMDGPU::XNACK_MASK;
2389       RegWidth = 2;
2390       return true;
2391     }
2392     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2393       Reg = AMDGPU::VCC;
2394       RegWidth = 2;
2395       return true;
2396     }
2397     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2398       Reg = AMDGPU::TBA;
2399       RegWidth = 2;
2400       return true;
2401     }
2402     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2403       Reg = AMDGPU::TMA;
2404       RegWidth = 2;
2405       return true;
2406     }
2407     Error(Loc, "register does not fit in the list");
2408     return false;
2409   case IS_VGPR:
2410   case IS_SGPR:
2411   case IS_AGPR:
2412   case IS_TTMP:
2413     if (Reg1 != Reg + RegWidth) {
2414       Error(Loc, "registers in a list must have consecutive indices");
2415       return false;
2416     }
2417     RegWidth++;
2418     return true;
2419   default:
2420     llvm_unreachable("unexpected register kind");
2421   }
2422 }
2423 
2424 struct RegInfo {
2425   StringLiteral Name;
2426   RegisterKind Kind;
2427 };
2428 
2429 static constexpr RegInfo RegularRegisters[] = {
2430   {{"v"},    IS_VGPR},
2431   {{"s"},    IS_SGPR},
2432   {{"ttmp"}, IS_TTMP},
2433   {{"acc"},  IS_AGPR},
2434   {{"a"},    IS_AGPR},
2435 };
2436 
2437 static bool isRegularReg(RegisterKind Kind) {
2438   return Kind == IS_VGPR ||
2439          Kind == IS_SGPR ||
2440          Kind == IS_TTMP ||
2441          Kind == IS_AGPR;
2442 }
2443 
2444 static const RegInfo* getRegularRegInfo(StringRef Str) {
2445   for (const RegInfo &Reg : RegularRegisters)
2446     if (Str.startswith(Reg.Name))
2447       return &Reg;
2448   return nullptr;
2449 }
2450 
2451 static bool getRegNum(StringRef Str, unsigned& Num) {
2452   return !Str.getAsInteger(10, Num);
2453 }
2454 
2455 bool
2456 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2457                             const AsmToken &NextToken) const {
2458 
2459   // A list of consecutive registers: [s0,s1,s2,s3]
2460   if (Token.is(AsmToken::LBrac))
2461     return true;
2462 
2463   if (!Token.is(AsmToken::Identifier))
2464     return false;
2465 
2466   // A single register like s0 or a range of registers like s[0:1]
2467 
2468   StringRef Str = Token.getString();
2469   const RegInfo *Reg = getRegularRegInfo(Str);
2470   if (Reg) {
2471     StringRef RegName = Reg->Name;
2472     StringRef RegSuffix = Str.substr(RegName.size());
2473     if (!RegSuffix.empty()) {
2474       unsigned Num;
2475       // A single register with an index: rXX
2476       if (getRegNum(RegSuffix, Num))
2477         return true;
2478     } else {
2479       // A range of registers: r[XX:YY].
2480       if (NextToken.is(AsmToken::LBrac))
2481         return true;
2482     }
2483   }
2484 
2485   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2486 }
2487 
2488 bool
2489 AMDGPUAsmParser::isRegister()
2490 {
2491   return isRegister(getToken(), peekToken());
2492 }
2493 
2494 unsigned
2495 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2496                                unsigned RegNum,
2497                                unsigned RegWidth,
2498                                SMLoc Loc) {
2499 
2500   assert(isRegularReg(RegKind));
2501 
2502   unsigned AlignSize = 1;
2503   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2504     // SGPR and TTMP registers must be aligned.
2505     // Max required alignment is 4 dwords.
2506     AlignSize = std::min(RegWidth, 4u);
2507   }
2508 
2509   if (RegNum % AlignSize != 0) {
2510     Error(Loc, "invalid register alignment");
2511     return AMDGPU::NoRegister;
2512   }
2513 
2514   unsigned RegIdx = RegNum / AlignSize;
2515   int RCID = getRegClass(RegKind, RegWidth);
2516   if (RCID == -1) {
2517     Error(Loc, "invalid or unsupported register size");
2518     return AMDGPU::NoRegister;
2519   }
2520 
2521   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2522   const MCRegisterClass RC = TRI->getRegClass(RCID);
2523   if (RegIdx >= RC.getNumRegs()) {
2524     Error(Loc, "register index is out of range");
2525     return AMDGPU::NoRegister;
2526   }
2527 
2528   return RC.getRegister(RegIdx);
2529 }
2530 
2531 bool
2532 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2533   int64_t RegLo, RegHi;
2534   if (!skipToken(AsmToken::LBrac, "missing register index"))
2535     return false;
2536 
2537   SMLoc FirstIdxLoc = getLoc();
2538   SMLoc SecondIdxLoc;
2539 
2540   if (!parseExpr(RegLo))
2541     return false;
2542 
2543   if (trySkipToken(AsmToken::Colon)) {
2544     SecondIdxLoc = getLoc();
2545     if (!parseExpr(RegHi))
2546       return false;
2547   } else {
2548     RegHi = RegLo;
2549   }
2550 
2551   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2552     return false;
2553 
2554   if (!isUInt<32>(RegLo)) {
2555     Error(FirstIdxLoc, "invalid register index");
2556     return false;
2557   }
2558 
2559   if (!isUInt<32>(RegHi)) {
2560     Error(SecondIdxLoc, "invalid register index");
2561     return false;
2562   }
2563 
2564   if (RegLo > RegHi) {
2565     Error(FirstIdxLoc, "first register index should not exceed second index");
2566     return false;
2567   }
2568 
2569   Num = static_cast<unsigned>(RegLo);
2570   Width = (RegHi - RegLo) + 1;
2571   return true;
2572 }
2573 
2574 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2575                                           unsigned &RegNum, unsigned &RegWidth,
2576                                           SmallVectorImpl<AsmToken> &Tokens) {
2577   assert(isToken(AsmToken::Identifier));
2578   unsigned Reg = getSpecialRegForName(getTokenStr());
2579   if (Reg) {
2580     RegNum = 0;
2581     RegWidth = 1;
2582     RegKind = IS_SPECIAL;
2583     Tokens.push_back(getToken());
2584     lex(); // skip register name
2585   }
2586   return Reg;
2587 }
2588 
2589 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2590                                           unsigned &RegNum, unsigned &RegWidth,
2591                                           SmallVectorImpl<AsmToken> &Tokens) {
2592   assert(isToken(AsmToken::Identifier));
2593   StringRef RegName = getTokenStr();
2594   auto Loc = getLoc();
2595 
2596   const RegInfo *RI = getRegularRegInfo(RegName);
2597   if (!RI) {
2598     Error(Loc, "invalid register name");
2599     return AMDGPU::NoRegister;
2600   }
2601 
2602   Tokens.push_back(getToken());
2603   lex(); // skip register name
2604 
2605   RegKind = RI->Kind;
2606   StringRef RegSuffix = RegName.substr(RI->Name.size());
2607   if (!RegSuffix.empty()) {
2608     // Single 32-bit register: vXX.
2609     if (!getRegNum(RegSuffix, RegNum)) {
2610       Error(Loc, "invalid register index");
2611       return AMDGPU::NoRegister;
2612     }
2613     RegWidth = 1;
2614   } else {
2615     // Range of registers: v[XX:YY]. ":YY" is optional.
2616     if (!ParseRegRange(RegNum, RegWidth))
2617       return AMDGPU::NoRegister;
2618   }
2619 
2620   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2621 }
2622 
2623 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2624                                        unsigned &RegWidth,
2625                                        SmallVectorImpl<AsmToken> &Tokens) {
2626   unsigned Reg = AMDGPU::NoRegister;
2627   auto ListLoc = getLoc();
2628 
2629   if (!skipToken(AsmToken::LBrac,
2630                  "expected a register or a list of registers")) {
2631     return AMDGPU::NoRegister;
2632   }
2633 
2634   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2635 
2636   auto Loc = getLoc();
2637   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2638     return AMDGPU::NoRegister;
2639   if (RegWidth != 1) {
2640     Error(Loc, "expected a single 32-bit register");
2641     return AMDGPU::NoRegister;
2642   }
2643 
2644   for (; trySkipToken(AsmToken::Comma); ) {
2645     RegisterKind NextRegKind;
2646     unsigned NextReg, NextRegNum, NextRegWidth;
2647     Loc = getLoc();
2648 
2649     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2650                              NextRegNum, NextRegWidth,
2651                              Tokens)) {
2652       return AMDGPU::NoRegister;
2653     }
2654     if (NextRegWidth != 1) {
2655       Error(Loc, "expected a single 32-bit register");
2656       return AMDGPU::NoRegister;
2657     }
2658     if (NextRegKind != RegKind) {
2659       Error(Loc, "registers in a list must be of the same kind");
2660       return AMDGPU::NoRegister;
2661     }
2662     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2663       return AMDGPU::NoRegister;
2664   }
2665 
2666   if (!skipToken(AsmToken::RBrac,
2667                  "expected a comma or a closing square bracket")) {
2668     return AMDGPU::NoRegister;
2669   }
2670 
2671   if (isRegularReg(RegKind))
2672     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2673 
2674   return Reg;
2675 }
2676 
2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2678                                           unsigned &RegNum, unsigned &RegWidth,
2679                                           SmallVectorImpl<AsmToken> &Tokens) {
2680   auto Loc = getLoc();
2681   Reg = AMDGPU::NoRegister;
2682 
2683   if (isToken(AsmToken::Identifier)) {
2684     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2685     if (Reg == AMDGPU::NoRegister)
2686       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2687   } else {
2688     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2689   }
2690 
2691   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2692   if (Reg == AMDGPU::NoRegister) {
2693     assert(Parser.hasPendingError());
2694     return false;
2695   }
2696 
2697   if (!subtargetHasRegister(*TRI, Reg)) {
2698     if (Reg == AMDGPU::SGPR_NULL) {
2699       Error(Loc, "'null' operand is not supported on this GPU");
2700     } else {
2701       Error(Loc, "register not available on this GPU");
2702     }
2703     return false;
2704   }
2705 
2706   return true;
2707 }
2708 
2709 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2710                                           unsigned &RegNum, unsigned &RegWidth,
2711                                           bool RestoreOnFailure /*=false*/) {
2712   Reg = AMDGPU::NoRegister;
2713 
2714   SmallVector<AsmToken, 1> Tokens;
2715   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2716     if (RestoreOnFailure) {
2717       while (!Tokens.empty()) {
2718         getLexer().UnLex(Tokens.pop_back_val());
2719       }
2720     }
2721     return true;
2722   }
2723   return false;
2724 }
2725 
2726 Optional<StringRef>
2727 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2728   switch (RegKind) {
2729   case IS_VGPR:
2730     return StringRef(".amdgcn.next_free_vgpr");
2731   case IS_SGPR:
2732     return StringRef(".amdgcn.next_free_sgpr");
2733   default:
2734     return None;
2735   }
2736 }
2737 
2738 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2739   auto SymbolName = getGprCountSymbolName(RegKind);
2740   assert(SymbolName && "initializing invalid register kind");
2741   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2742   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2743 }
2744 
2745 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2746                                             unsigned DwordRegIndex,
2747                                             unsigned RegWidth) {
2748   // Symbols are only defined for GCN targets
2749   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2750     return true;
2751 
2752   auto SymbolName = getGprCountSymbolName(RegKind);
2753   if (!SymbolName)
2754     return true;
2755   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2756 
2757   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2758   int64_t OldCount;
2759 
2760   if (!Sym->isVariable())
2761     return !Error(getLoc(),
2762                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2763   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2764     return !Error(
2765         getLoc(),
2766         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2767 
2768   if (OldCount <= NewMax)
2769     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2770 
2771   return true;
2772 }
2773 
2774 std::unique_ptr<AMDGPUOperand>
2775 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2776   const auto &Tok = getToken();
2777   SMLoc StartLoc = Tok.getLoc();
2778   SMLoc EndLoc = Tok.getEndLoc();
2779   RegisterKind RegKind;
2780   unsigned Reg, RegNum, RegWidth;
2781 
2782   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2783     return nullptr;
2784   }
2785   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2786     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2787       return nullptr;
2788   } else
2789     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2790   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2791 }
2792 
2793 OperandMatchResultTy
2794 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2795   // TODO: add syntactic sugar for 1/(2*PI)
2796 
2797   assert(!isRegister());
2798   assert(!isModifier());
2799 
2800   const auto& Tok = getToken();
2801   const auto& NextTok = peekToken();
2802   bool IsReal = Tok.is(AsmToken::Real);
2803   SMLoc S = getLoc();
2804   bool Negate = false;
2805 
2806   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2807     lex();
2808     IsReal = true;
2809     Negate = true;
2810   }
2811 
2812   if (IsReal) {
2813     // Floating-point expressions are not supported.
2814     // Can only allow floating-point literals with an
2815     // optional sign.
2816 
2817     StringRef Num = getTokenStr();
2818     lex();
2819 
2820     APFloat RealVal(APFloat::IEEEdouble());
2821     auto roundMode = APFloat::rmNearestTiesToEven;
2822     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2823       return MatchOperand_ParseFail;
2824     }
2825     if (Negate)
2826       RealVal.changeSign();
2827 
2828     Operands.push_back(
2829       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2830                                AMDGPUOperand::ImmTyNone, true));
2831 
2832     return MatchOperand_Success;
2833 
2834   } else {
2835     int64_t IntVal;
2836     const MCExpr *Expr;
2837     SMLoc S = getLoc();
2838 
2839     if (HasSP3AbsModifier) {
2840       // This is a workaround for handling expressions
2841       // as arguments of SP3 'abs' modifier, for example:
2842       //     |1.0|
2843       //     |-1|
2844       //     |1+x|
2845       // This syntax is not compatible with syntax of standard
2846       // MC expressions (due to the trailing '|').
2847       SMLoc EndLoc;
2848       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2849         return MatchOperand_ParseFail;
2850     } else {
2851       if (Parser.parseExpression(Expr))
2852         return MatchOperand_ParseFail;
2853     }
2854 
2855     if (Expr->evaluateAsAbsolute(IntVal)) {
2856       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2857     } else {
2858       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2859     }
2860 
2861     return MatchOperand_Success;
2862   }
2863 
2864   return MatchOperand_NoMatch;
2865 }
2866 
2867 OperandMatchResultTy
2868 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2869   if (!isRegister())
2870     return MatchOperand_NoMatch;
2871 
2872   if (auto R = parseRegister()) {
2873     assert(R->isReg());
2874     Operands.push_back(std::move(R));
2875     return MatchOperand_Success;
2876   }
2877   return MatchOperand_ParseFail;
2878 }
2879 
2880 OperandMatchResultTy
2881 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2882   auto res = parseReg(Operands);
2883   if (res != MatchOperand_NoMatch) {
2884     return res;
2885   } else if (isModifier()) {
2886     return MatchOperand_NoMatch;
2887   } else {
2888     return parseImm(Operands, HasSP3AbsMod);
2889   }
2890 }
2891 
2892 bool
2893 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2894   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2895     const auto &str = Token.getString();
2896     return str == "abs" || str == "neg" || str == "sext";
2897   }
2898   return false;
2899 }
2900 
2901 bool
2902 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2903   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2904 }
2905 
2906 bool
2907 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2908   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2909 }
2910 
2911 bool
2912 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2913   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2914 }
2915 
2916 // Check if this is an operand modifier or an opcode modifier
2917 // which may look like an expression but it is not. We should
2918 // avoid parsing these modifiers as expressions. Currently
2919 // recognized sequences are:
2920 //   |...|
2921 //   abs(...)
2922 //   neg(...)
2923 //   sext(...)
2924 //   -reg
2925 //   -|...|
2926 //   -abs(...)
2927 //   name:...
2928 // Note that simple opcode modifiers like 'gds' may be parsed as
2929 // expressions; this is a special case. See getExpressionAsToken.
2930 //
2931 bool
2932 AMDGPUAsmParser::isModifier() {
2933 
2934   AsmToken Tok = getToken();
2935   AsmToken NextToken[2];
2936   peekTokens(NextToken);
2937 
2938   return isOperandModifier(Tok, NextToken[0]) ||
2939          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2940          isOpcodeModifierWithVal(Tok, NextToken[0]);
2941 }
2942 
2943 // Check if the current token is an SP3 'neg' modifier.
2944 // Currently this modifier is allowed in the following context:
2945 //
2946 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2947 // 2. Before an 'abs' modifier: -abs(...)
2948 // 3. Before an SP3 'abs' modifier: -|...|
2949 //
2950 // In all other cases "-" is handled as a part
2951 // of an expression that follows the sign.
2952 //
2953 // Note: When "-" is followed by an integer literal,
2954 // this is interpreted as integer negation rather
2955 // than a floating-point NEG modifier applied to N.
2956 // Beside being contr-intuitive, such use of floating-point
2957 // NEG modifier would have resulted in different meaning
2958 // of integer literals used with VOP1/2/C and VOP3,
2959 // for example:
2960 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2961 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2962 // Negative fp literals with preceding "-" are
2963 // handled likewise for uniformity
2964 //
2965 bool
2966 AMDGPUAsmParser::parseSP3NegModifier() {
2967 
2968   AsmToken NextToken[2];
2969   peekTokens(NextToken);
2970 
2971   if (isToken(AsmToken::Minus) &&
2972       (isRegister(NextToken[0], NextToken[1]) ||
2973        NextToken[0].is(AsmToken::Pipe) ||
2974        isId(NextToken[0], "abs"))) {
2975     lex();
2976     return true;
2977   }
2978 
2979   return false;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2984                                               bool AllowImm) {
2985   bool Neg, SP3Neg;
2986   bool Abs, SP3Abs;
2987   SMLoc Loc;
2988 
2989   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2990   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2991     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2992     return MatchOperand_ParseFail;
2993   }
2994 
2995   SP3Neg = parseSP3NegModifier();
2996 
2997   Loc = getLoc();
2998   Neg = trySkipId("neg");
2999   if (Neg && SP3Neg) {
3000     Error(Loc, "expected register or immediate");
3001     return MatchOperand_ParseFail;
3002   }
3003   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3004     return MatchOperand_ParseFail;
3005 
3006   Abs = trySkipId("abs");
3007   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3008     return MatchOperand_ParseFail;
3009 
3010   Loc = getLoc();
3011   SP3Abs = trySkipToken(AsmToken::Pipe);
3012   if (Abs && SP3Abs) {
3013     Error(Loc, "expected register or immediate");
3014     return MatchOperand_ParseFail;
3015   }
3016 
3017   OperandMatchResultTy Res;
3018   if (AllowImm) {
3019     Res = parseRegOrImm(Operands, SP3Abs);
3020   } else {
3021     Res = parseReg(Operands);
3022   }
3023   if (Res != MatchOperand_Success) {
3024     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3025   }
3026 
3027   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3028     return MatchOperand_ParseFail;
3029   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3030     return MatchOperand_ParseFail;
3031   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3032     return MatchOperand_ParseFail;
3033 
3034   AMDGPUOperand::Modifiers Mods;
3035   Mods.Abs = Abs || SP3Abs;
3036   Mods.Neg = Neg || SP3Neg;
3037 
3038   if (Mods.hasFPModifiers()) {
3039     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3040     if (Op.isExpr()) {
3041       Error(Op.getStartLoc(), "expected an absolute expression");
3042       return MatchOperand_ParseFail;
3043     }
3044     Op.setModifiers(Mods);
3045   }
3046   return MatchOperand_Success;
3047 }
3048 
3049 OperandMatchResultTy
3050 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3051                                                bool AllowImm) {
3052   bool Sext = trySkipId("sext");
3053   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3054     return MatchOperand_ParseFail;
3055 
3056   OperandMatchResultTy Res;
3057   if (AllowImm) {
3058     Res = parseRegOrImm(Operands);
3059   } else {
3060     Res = parseReg(Operands);
3061   }
3062   if (Res != MatchOperand_Success) {
3063     return Sext? MatchOperand_ParseFail : Res;
3064   }
3065 
3066   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3067     return MatchOperand_ParseFail;
3068 
3069   AMDGPUOperand::Modifiers Mods;
3070   Mods.Sext = Sext;
3071 
3072   if (Mods.hasIntModifiers()) {
3073     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3074     if (Op.isExpr()) {
3075       Error(Op.getStartLoc(), "expected an absolute expression");
3076       return MatchOperand_ParseFail;
3077     }
3078     Op.setModifiers(Mods);
3079   }
3080 
3081   return MatchOperand_Success;
3082 }
3083 
3084 OperandMatchResultTy
3085 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3086   return parseRegOrImmWithFPInputMods(Operands, false);
3087 }
3088 
3089 OperandMatchResultTy
3090 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3091   return parseRegOrImmWithIntInputMods(Operands, false);
3092 }
3093 
3094 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3095   auto Loc = getLoc();
3096   if (trySkipId("off")) {
3097     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3098                                                 AMDGPUOperand::ImmTyOff, false));
3099     return MatchOperand_Success;
3100   }
3101 
3102   if (!isRegister())
3103     return MatchOperand_NoMatch;
3104 
3105   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3106   if (Reg) {
3107     Operands.push_back(std::move(Reg));
3108     return MatchOperand_Success;
3109   }
3110 
3111   return MatchOperand_ParseFail;
3112 
3113 }
3114 
3115 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3116   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3117 
3118   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3119       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3120       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3121       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3122     return Match_InvalidOperand;
3123 
3124   if ((TSFlags & SIInstrFlags::VOP3) &&
3125       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3126       getForcedEncodingSize() != 64)
3127     return Match_PreferE32;
3128 
3129   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3130       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3131     // v_mac_f32/16 allow only dst_sel == DWORD;
3132     auto OpNum =
3133         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3134     const auto &Op = Inst.getOperand(OpNum);
3135     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3136       return Match_InvalidOperand;
3137     }
3138   }
3139 
3140   return Match_Success;
3141 }
3142 
3143 static ArrayRef<unsigned> getAllVariants() {
3144   static const unsigned Variants[] = {
3145     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3146     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3147   };
3148 
3149   return makeArrayRef(Variants);
3150 }
3151 
3152 // What asm variants we should check
3153 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3154   if (getForcedEncodingSize() == 32) {
3155     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3156     return makeArrayRef(Variants);
3157   }
3158 
3159   if (isForcedVOP3()) {
3160     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3161     return makeArrayRef(Variants);
3162   }
3163 
3164   if (isForcedSDWA()) {
3165     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3166                                         AMDGPUAsmVariants::SDWA9};
3167     return makeArrayRef(Variants);
3168   }
3169 
3170   if (isForcedDPP()) {
3171     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3172     return makeArrayRef(Variants);
3173   }
3174 
3175   return getAllVariants();
3176 }
3177 
3178 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3179   if (getForcedEncodingSize() == 32)
3180     return "e32";
3181 
3182   if (isForcedVOP3())
3183     return "e64";
3184 
3185   if (isForcedSDWA())
3186     return "sdwa";
3187 
3188   if (isForcedDPP())
3189     return "dpp";
3190 
3191   return "";
3192 }
3193 
3194 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3195   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3196   const unsigned Num = Desc.getNumImplicitUses();
3197   for (unsigned i = 0; i < Num; ++i) {
3198     unsigned Reg = Desc.ImplicitUses[i];
3199     switch (Reg) {
3200     case AMDGPU::FLAT_SCR:
3201     case AMDGPU::VCC:
3202     case AMDGPU::VCC_LO:
3203     case AMDGPU::VCC_HI:
3204     case AMDGPU::M0:
3205       return Reg;
3206     default:
3207       break;
3208     }
3209   }
3210   return AMDGPU::NoRegister;
3211 }
3212 
3213 // NB: This code is correct only when used to check constant
3214 // bus limitations because GFX7 support no f16 inline constants.
3215 // Note that there are no cases when a GFX7 opcode violates
3216 // constant bus limitations due to the use of an f16 constant.
3217 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3218                                        unsigned OpIdx) const {
3219   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3220 
3221   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3222     return false;
3223   }
3224 
3225   const MCOperand &MO = Inst.getOperand(OpIdx);
3226 
3227   int64_t Val = MO.getImm();
3228   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3229 
3230   switch (OpSize) { // expected operand size
3231   case 8:
3232     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3233   case 4:
3234     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3235   case 2: {
3236     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3237     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3238         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3239         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3240       return AMDGPU::isInlinableIntLiteral(Val);
3241 
3242     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3243         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3245       return AMDGPU::isInlinableIntLiteralV216(Val);
3246 
3247     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3248         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3249         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3250       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3251 
3252     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3253   }
3254   default:
3255     llvm_unreachable("invalid operand size");
3256   }
3257 }
3258 
3259 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3260   if (!isGFX10Plus())
3261     return 1;
3262 
3263   switch (Opcode) {
3264   // 64-bit shift instructions can use only one scalar value input
3265   case AMDGPU::V_LSHLREV_B64_e64:
3266   case AMDGPU::V_LSHLREV_B64_gfx10:
3267   case AMDGPU::V_LSHRREV_B64_e64:
3268   case AMDGPU::V_LSHRREV_B64_gfx10:
3269   case AMDGPU::V_ASHRREV_I64_e64:
3270   case AMDGPU::V_ASHRREV_I64_gfx10:
3271   case AMDGPU::V_LSHL_B64_e64:
3272   case AMDGPU::V_LSHR_B64_e64:
3273   case AMDGPU::V_ASHR_I64_e64:
3274     return 1;
3275   default:
3276     return 2;
3277   }
3278 }
3279 
3280 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3281   const MCOperand &MO = Inst.getOperand(OpIdx);
3282   if (MO.isImm()) {
3283     return !isInlineConstant(Inst, OpIdx);
3284   } else if (MO.isReg()) {
3285     auto Reg = MO.getReg();
3286     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3287     auto PReg = mc2PseudoReg(Reg);
3288     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3289   } else {
3290     return true;
3291   }
3292 }
3293 
3294 bool
3295 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3296                                                 const OperandVector &Operands) {
3297   const unsigned Opcode = Inst.getOpcode();
3298   const MCInstrDesc &Desc = MII.get(Opcode);
3299   unsigned LastSGPR = AMDGPU::NoRegister;
3300   unsigned ConstantBusUseCount = 0;
3301   unsigned NumLiterals = 0;
3302   unsigned LiteralSize;
3303 
3304   if (Desc.TSFlags &
3305       (SIInstrFlags::VOPC |
3306        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3307        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3308        SIInstrFlags::SDWA)) {
3309     // Check special imm operands (used by madmk, etc)
3310     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3311       ++NumLiterals;
3312       LiteralSize = 4;
3313     }
3314 
3315     SmallDenseSet<unsigned> SGPRsUsed;
3316     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3317     if (SGPRUsed != AMDGPU::NoRegister) {
3318       SGPRsUsed.insert(SGPRUsed);
3319       ++ConstantBusUseCount;
3320     }
3321 
3322     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3323     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3324     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3325 
3326     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3327 
3328     for (int OpIdx : OpIndices) {
3329       if (OpIdx == -1) break;
3330 
3331       const MCOperand &MO = Inst.getOperand(OpIdx);
3332       if (usesConstantBus(Inst, OpIdx)) {
3333         if (MO.isReg()) {
3334           LastSGPR = mc2PseudoReg(MO.getReg());
3335           // Pairs of registers with a partial intersections like these
3336           //   s0, s[0:1]
3337           //   flat_scratch_lo, flat_scratch
3338           //   flat_scratch_lo, flat_scratch_hi
3339           // are theoretically valid but they are disabled anyway.
3340           // Note that this code mimics SIInstrInfo::verifyInstruction
3341           if (!SGPRsUsed.count(LastSGPR)) {
3342             SGPRsUsed.insert(LastSGPR);
3343             ++ConstantBusUseCount;
3344           }
3345         } else { // Expression or a literal
3346 
3347           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3348             continue; // special operand like VINTERP attr_chan
3349 
3350           // An instruction may use only one literal.
3351           // This has been validated on the previous step.
3352           // See validateVOPLiteral.
3353           // This literal may be used as more than one operand.
3354           // If all these operands are of the same size,
3355           // this literal counts as one scalar value.
3356           // Otherwise it counts as 2 scalar values.
3357           // See "GFX10 Shader Programming", section 3.6.2.3.
3358 
3359           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3360           if (Size < 4) Size = 4;
3361 
3362           if (NumLiterals == 0) {
3363             NumLiterals = 1;
3364             LiteralSize = Size;
3365           } else if (LiteralSize != Size) {
3366             NumLiterals = 2;
3367           }
3368         }
3369       }
3370     }
3371   }
3372   ConstantBusUseCount += NumLiterals;
3373 
3374   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3375     return true;
3376 
3377   SMLoc LitLoc = getLitLoc(Operands);
3378   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3379   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3380   Error(Loc, "invalid operand (violates constant bus restrictions)");
3381   return false;
3382 }
3383 
3384 bool
3385 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3386                                                  const OperandVector &Operands) {
3387   const unsigned Opcode = Inst.getOpcode();
3388   const MCInstrDesc &Desc = MII.get(Opcode);
3389 
3390   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3391   if (DstIdx == -1 ||
3392       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3393     return true;
3394   }
3395 
3396   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3397 
3398   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3399   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3400   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3401 
3402   assert(DstIdx != -1);
3403   const MCOperand &Dst = Inst.getOperand(DstIdx);
3404   assert(Dst.isReg());
3405 
3406   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3407 
3408   for (int SrcIdx : SrcIndices) {
3409     if (SrcIdx == -1) break;
3410     const MCOperand &Src = Inst.getOperand(SrcIdx);
3411     if (Src.isReg()) {
3412       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3413         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3414         Error(getRegLoc(SrcReg, Operands),
3415           "destination must be different than all sources");
3416         return false;
3417       }
3418     }
3419   }
3420 
3421   return true;
3422 }
3423 
3424 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3425 
3426   const unsigned Opc = Inst.getOpcode();
3427   const MCInstrDesc &Desc = MII.get(Opc);
3428 
3429   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3430     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3431     assert(ClampIdx != -1);
3432     return Inst.getOperand(ClampIdx).getImm() == 0;
3433   }
3434 
3435   return true;
3436 }
3437 
3438 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3439 
3440   const unsigned Opc = Inst.getOpcode();
3441   const MCInstrDesc &Desc = MII.get(Opc);
3442 
3443   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3444     return true;
3445 
3446   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3447   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3448   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3449 
3450   assert(VDataIdx != -1);
3451 
3452   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3453     return true;
3454 
3455   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3456   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3457   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3458   if (DMask == 0)
3459     DMask = 1;
3460 
3461   unsigned DataSize =
3462     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3463   if (hasPackedD16()) {
3464     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3465     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3466       DataSize = (DataSize + 1) / 2;
3467   }
3468 
3469   return (VDataSize / 4) == DataSize + TFESize;
3470 }
3471 
3472 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3473   const unsigned Opc = Inst.getOpcode();
3474   const MCInstrDesc &Desc = MII.get(Opc);
3475 
3476   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3477     return true;
3478 
3479   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3480 
3481   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3482       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3483   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3484   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3485   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3486   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3487 
3488   assert(VAddr0Idx != -1);
3489   assert(SrsrcIdx != -1);
3490   assert(SrsrcIdx > VAddr0Idx);
3491 
3492   if (DimIdx == -1)
3493     return true; // intersect_ray
3494 
3495   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3496   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3497   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3498   unsigned ActualAddrSize =
3499       IsNSA ? SrsrcIdx - VAddr0Idx
3500             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3501   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3502 
3503   unsigned ExpectedAddrSize =
3504       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3505 
3506   if (!IsNSA) {
3507     if (ExpectedAddrSize > 8)
3508       ExpectedAddrSize = 16;
3509 
3510     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3511     // This provides backward compatibility for assembly created
3512     // before 160b/192b/224b types were directly supported.
3513     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3514       return true;
3515   }
3516 
3517   return ActualAddrSize == ExpectedAddrSize;
3518 }
3519 
3520 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3521 
3522   const unsigned Opc = Inst.getOpcode();
3523   const MCInstrDesc &Desc = MII.get(Opc);
3524 
3525   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3526     return true;
3527   if (!Desc.mayLoad() || !Desc.mayStore())
3528     return true; // Not atomic
3529 
3530   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3531   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3532 
3533   // This is an incomplete check because image_atomic_cmpswap
3534   // may only use 0x3 and 0xf while other atomic operations
3535   // may use 0x1 and 0x3. However these limitations are
3536   // verified when we check that dmask matches dst size.
3537   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3538 }
3539 
3540 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3541 
3542   const unsigned Opc = Inst.getOpcode();
3543   const MCInstrDesc &Desc = MII.get(Opc);
3544 
3545   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3546     return true;
3547 
3548   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3549   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3550 
3551   // GATHER4 instructions use dmask in a different fashion compared to
3552   // other MIMG instructions. The only useful DMASK values are
3553   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3554   // (red,red,red,red) etc.) The ISA document doesn't mention
3555   // this.
3556   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3557 }
3558 
3559 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3560   const unsigned Opc = Inst.getOpcode();
3561   const MCInstrDesc &Desc = MII.get(Opc);
3562 
3563   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3564     return true;
3565 
3566   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3567   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3568       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3569 
3570   if (!BaseOpcode->MSAA)
3571     return true;
3572 
3573   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3574   assert(DimIdx != -1);
3575 
3576   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3577   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3578 
3579   return DimInfo->MSAA;
3580 }
3581 
3582 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3583 {
3584   switch (Opcode) {
3585   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3586   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3587   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3588     return true;
3589   default:
3590     return false;
3591   }
3592 }
3593 
3594 // movrels* opcodes should only allow VGPRS as src0.
3595 // This is specified in .td description for vop1/vop3,
3596 // but sdwa is handled differently. See isSDWAOperand.
3597 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3598                                       const OperandVector &Operands) {
3599 
3600   const unsigned Opc = Inst.getOpcode();
3601   const MCInstrDesc &Desc = MII.get(Opc);
3602 
3603   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3604     return true;
3605 
3606   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3607   assert(Src0Idx != -1);
3608 
3609   SMLoc ErrLoc;
3610   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3611   if (Src0.isReg()) {
3612     auto Reg = mc2PseudoReg(Src0.getReg());
3613     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3614     if (!isSGPR(Reg, TRI))
3615       return true;
3616     ErrLoc = getRegLoc(Reg, Operands);
3617   } else {
3618     ErrLoc = getConstLoc(Operands);
3619   }
3620 
3621   Error(ErrLoc, "source operand must be a VGPR");
3622   return false;
3623 }
3624 
3625 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3626                                           const OperandVector &Operands) {
3627 
3628   const unsigned Opc = Inst.getOpcode();
3629 
3630   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3631     return true;
3632 
3633   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3634   assert(Src0Idx != -1);
3635 
3636   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3637   if (!Src0.isReg())
3638     return true;
3639 
3640   auto Reg = mc2PseudoReg(Src0.getReg());
3641   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3642   if (isSGPR(Reg, TRI)) {
3643     Error(getRegLoc(Reg, Operands),
3644           "source operand must be either a VGPR or an inline constant");
3645     return false;
3646   }
3647 
3648   return true;
3649 }
3650 
3651 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3652                                    const OperandVector &Operands) {
3653   const unsigned Opc = Inst.getOpcode();
3654   const MCInstrDesc &Desc = MII.get(Opc);
3655 
3656   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3657     return true;
3658 
3659   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3660   if (Src2Idx == -1)
3661     return true;
3662 
3663   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3664   if (!Src2.isReg())
3665     return true;
3666 
3667   MCRegister Src2Reg = Src2.getReg();
3668   MCRegister DstReg = Inst.getOperand(0).getReg();
3669   if (Src2Reg == DstReg)
3670     return true;
3671 
3672   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3673   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3674     return true;
3675 
3676   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3677     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3678           "source 2 operand must not partially overlap with dst");
3679     return false;
3680   }
3681 
3682   return true;
3683 }
3684 
3685 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3686   switch (Inst.getOpcode()) {
3687   default:
3688     return true;
3689   case V_DIV_SCALE_F32_gfx6_gfx7:
3690   case V_DIV_SCALE_F32_vi:
3691   case V_DIV_SCALE_F32_gfx10:
3692   case V_DIV_SCALE_F64_gfx6_gfx7:
3693   case V_DIV_SCALE_F64_vi:
3694   case V_DIV_SCALE_F64_gfx10:
3695     break;
3696   }
3697 
3698   // TODO: Check that src0 = src1 or src2.
3699 
3700   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3701                     AMDGPU::OpName::src2_modifiers,
3702                     AMDGPU::OpName::src2_modifiers}) {
3703     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3704             .getImm() &
3705         SISrcMods::ABS) {
3706       return false;
3707     }
3708   }
3709 
3710   return true;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3714 
3715   const unsigned Opc = Inst.getOpcode();
3716   const MCInstrDesc &Desc = MII.get(Opc);
3717 
3718   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3719     return true;
3720 
3721   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3722   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3723     if (isCI() || isSI())
3724       return false;
3725   }
3726 
3727   return true;
3728 }
3729 
3730 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3731   const unsigned Opc = Inst.getOpcode();
3732   const MCInstrDesc &Desc = MII.get(Opc);
3733 
3734   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3735     return true;
3736 
3737   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3738   if (DimIdx < 0)
3739     return true;
3740 
3741   long Imm = Inst.getOperand(DimIdx).getImm();
3742   if (Imm < 0 || Imm >= 8)
3743     return false;
3744 
3745   return true;
3746 }
3747 
3748 static bool IsRevOpcode(const unsigned Opcode)
3749 {
3750   switch (Opcode) {
3751   case AMDGPU::V_SUBREV_F32_e32:
3752   case AMDGPU::V_SUBREV_F32_e64:
3753   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3754   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3755   case AMDGPU::V_SUBREV_F32_e32_vi:
3756   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3757   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3758   case AMDGPU::V_SUBREV_F32_e64_vi:
3759 
3760   case AMDGPU::V_SUBREV_CO_U32_e32:
3761   case AMDGPU::V_SUBREV_CO_U32_e64:
3762   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3763   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3764 
3765   case AMDGPU::V_SUBBREV_U32_e32:
3766   case AMDGPU::V_SUBBREV_U32_e64:
3767   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3768   case AMDGPU::V_SUBBREV_U32_e32_vi:
3769   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3770   case AMDGPU::V_SUBBREV_U32_e64_vi:
3771 
3772   case AMDGPU::V_SUBREV_U32_e32:
3773   case AMDGPU::V_SUBREV_U32_e64:
3774   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3775   case AMDGPU::V_SUBREV_U32_e32_vi:
3776   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3777   case AMDGPU::V_SUBREV_U32_e64_vi:
3778 
3779   case AMDGPU::V_SUBREV_F16_e32:
3780   case AMDGPU::V_SUBREV_F16_e64:
3781   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3782   case AMDGPU::V_SUBREV_F16_e32_vi:
3783   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3784   case AMDGPU::V_SUBREV_F16_e64_vi:
3785 
3786   case AMDGPU::V_SUBREV_U16_e32:
3787   case AMDGPU::V_SUBREV_U16_e64:
3788   case AMDGPU::V_SUBREV_U16_e32_vi:
3789   case AMDGPU::V_SUBREV_U16_e64_vi:
3790 
3791   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3792   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3793   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3794 
3795   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3796   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3797 
3798   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3799   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3800 
3801   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3802   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3803 
3804   case AMDGPU::V_LSHRREV_B32_e32:
3805   case AMDGPU::V_LSHRREV_B32_e64:
3806   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3807   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3808   case AMDGPU::V_LSHRREV_B32_e32_vi:
3809   case AMDGPU::V_LSHRREV_B32_e64_vi:
3810   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3811   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3812 
3813   case AMDGPU::V_ASHRREV_I32_e32:
3814   case AMDGPU::V_ASHRREV_I32_e64:
3815   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3816   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3817   case AMDGPU::V_ASHRREV_I32_e32_vi:
3818   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3819   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3820   case AMDGPU::V_ASHRREV_I32_e64_vi:
3821 
3822   case AMDGPU::V_LSHLREV_B32_e32:
3823   case AMDGPU::V_LSHLREV_B32_e64:
3824   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3825   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3826   case AMDGPU::V_LSHLREV_B32_e32_vi:
3827   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3828   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3829   case AMDGPU::V_LSHLREV_B32_e64_vi:
3830 
3831   case AMDGPU::V_LSHLREV_B16_e32:
3832   case AMDGPU::V_LSHLREV_B16_e64:
3833   case AMDGPU::V_LSHLREV_B16_e32_vi:
3834   case AMDGPU::V_LSHLREV_B16_e64_vi:
3835   case AMDGPU::V_LSHLREV_B16_gfx10:
3836 
3837   case AMDGPU::V_LSHRREV_B16_e32:
3838   case AMDGPU::V_LSHRREV_B16_e64:
3839   case AMDGPU::V_LSHRREV_B16_e32_vi:
3840   case AMDGPU::V_LSHRREV_B16_e64_vi:
3841   case AMDGPU::V_LSHRREV_B16_gfx10:
3842 
3843   case AMDGPU::V_ASHRREV_I16_e32:
3844   case AMDGPU::V_ASHRREV_I16_e64:
3845   case AMDGPU::V_ASHRREV_I16_e32_vi:
3846   case AMDGPU::V_ASHRREV_I16_e64_vi:
3847   case AMDGPU::V_ASHRREV_I16_gfx10:
3848 
3849   case AMDGPU::V_LSHLREV_B64_e64:
3850   case AMDGPU::V_LSHLREV_B64_gfx10:
3851   case AMDGPU::V_LSHLREV_B64_vi:
3852 
3853   case AMDGPU::V_LSHRREV_B64_e64:
3854   case AMDGPU::V_LSHRREV_B64_gfx10:
3855   case AMDGPU::V_LSHRREV_B64_vi:
3856 
3857   case AMDGPU::V_ASHRREV_I64_e64:
3858   case AMDGPU::V_ASHRREV_I64_gfx10:
3859   case AMDGPU::V_ASHRREV_I64_vi:
3860 
3861   case AMDGPU::V_PK_LSHLREV_B16:
3862   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3863   case AMDGPU::V_PK_LSHLREV_B16_vi:
3864 
3865   case AMDGPU::V_PK_LSHRREV_B16:
3866   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3867   case AMDGPU::V_PK_LSHRREV_B16_vi:
3868   case AMDGPU::V_PK_ASHRREV_I16:
3869   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3870   case AMDGPU::V_PK_ASHRREV_I16_vi:
3871     return true;
3872   default:
3873     return false;
3874   }
3875 }
3876 
3877 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3878 
3879   using namespace SIInstrFlags;
3880   const unsigned Opcode = Inst.getOpcode();
3881   const MCInstrDesc &Desc = MII.get(Opcode);
3882 
3883   // lds_direct register is defined so that it can be used
3884   // with 9-bit operands only. Ignore encodings which do not accept these.
3885   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3886   if ((Desc.TSFlags & Enc) == 0)
3887     return None;
3888 
3889   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3890     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3891     if (SrcIdx == -1)
3892       break;
3893     const auto &Src = Inst.getOperand(SrcIdx);
3894     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3895 
3896       if (isGFX90A())
3897         return StringRef("lds_direct is not supported on this GPU");
3898 
3899       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3900         return StringRef("lds_direct cannot be used with this instruction");
3901 
3902       if (SrcName != OpName::src0)
3903         return StringRef("lds_direct may be used as src0 only");
3904     }
3905   }
3906 
3907   return None;
3908 }
3909 
3910 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3911   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3912     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3913     if (Op.isFlatOffset())
3914       return Op.getStartLoc();
3915   }
3916   return getLoc();
3917 }
3918 
3919 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3920                                          const OperandVector &Operands) {
3921   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3922   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3923     return true;
3924 
3925   auto Opcode = Inst.getOpcode();
3926   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3927   assert(OpNum != -1);
3928 
3929   const auto &Op = Inst.getOperand(OpNum);
3930   if (!hasFlatOffsets() && Op.getImm() != 0) {
3931     Error(getFlatOffsetLoc(Operands),
3932           "flat offset modifier is not supported on this GPU");
3933     return false;
3934   }
3935 
3936   // For FLAT segment the offset must be positive;
3937   // MSB is ignored and forced to zero.
3938   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3939     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3940     if (!isIntN(OffsetSize, Op.getImm())) {
3941       Error(getFlatOffsetLoc(Operands),
3942             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3943       return false;
3944     }
3945   } else {
3946     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3947     if (!isUIntN(OffsetSize, Op.getImm())) {
3948       Error(getFlatOffsetLoc(Operands),
3949             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3950       return false;
3951     }
3952   }
3953 
3954   return true;
3955 }
3956 
3957 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3958   // Start with second operand because SMEM Offset cannot be dst or src0.
3959   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3960     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3961     if (Op.isSMEMOffset())
3962       return Op.getStartLoc();
3963   }
3964   return getLoc();
3965 }
3966 
3967 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3968                                          const OperandVector &Operands) {
3969   if (isCI() || isSI())
3970     return true;
3971 
3972   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3973   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3974     return true;
3975 
3976   auto Opcode = Inst.getOpcode();
3977   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3978   if (OpNum == -1)
3979     return true;
3980 
3981   const auto &Op = Inst.getOperand(OpNum);
3982   if (!Op.isImm())
3983     return true;
3984 
3985   uint64_t Offset = Op.getImm();
3986   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3987   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3988       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3989     return true;
3990 
3991   Error(getSMEMOffsetLoc(Operands),
3992         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3993                                "expected a 21-bit signed offset");
3994 
3995   return false;
3996 }
3997 
3998 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3999   unsigned Opcode = Inst.getOpcode();
4000   const MCInstrDesc &Desc = MII.get(Opcode);
4001   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4002     return true;
4003 
4004   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4005   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4006 
4007   const int OpIndices[] = { Src0Idx, Src1Idx };
4008 
4009   unsigned NumExprs = 0;
4010   unsigned NumLiterals = 0;
4011   uint32_t LiteralValue;
4012 
4013   for (int OpIdx : OpIndices) {
4014     if (OpIdx == -1) break;
4015 
4016     const MCOperand &MO = Inst.getOperand(OpIdx);
4017     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4018     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4019       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4020         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4021         if (NumLiterals == 0 || LiteralValue != Value) {
4022           LiteralValue = Value;
4023           ++NumLiterals;
4024         }
4025       } else if (MO.isExpr()) {
4026         ++NumExprs;
4027       }
4028     }
4029   }
4030 
4031   return NumLiterals + NumExprs <= 1;
4032 }
4033 
4034 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4035   const unsigned Opc = Inst.getOpcode();
4036   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4037       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4038     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4039     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4040 
4041     if (OpSel & ~3)
4042       return false;
4043   }
4044   return true;
4045 }
4046 
4047 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4048                                   const OperandVector &Operands) {
4049   const unsigned Opc = Inst.getOpcode();
4050   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4051   if (DppCtrlIdx < 0)
4052     return true;
4053   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4054 
4055   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4056     // DPP64 is supported for row_newbcast only.
4057     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4058     if (Src0Idx >= 0 &&
4059         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4060       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4061       Error(S, "64 bit dpp only supports row_newbcast");
4062       return false;
4063     }
4064   }
4065 
4066   return true;
4067 }
4068 
4069 // Check if VCC register matches wavefront size
4070 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4071   auto FB = getFeatureBits();
4072   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4073     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4074 }
4075 
4076 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4077 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4078                                          const OperandVector &Operands) {
4079   unsigned Opcode = Inst.getOpcode();
4080   const MCInstrDesc &Desc = MII.get(Opcode);
4081   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4082   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4083       ImmIdx == -1)
4084     return true;
4085 
4086   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4087   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4088   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4089 
4090   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4091 
4092   unsigned NumExprs = 0;
4093   unsigned NumLiterals = 0;
4094   uint32_t LiteralValue;
4095 
4096   for (int OpIdx : OpIndices) {
4097     if (OpIdx == -1)
4098       continue;
4099 
4100     const MCOperand &MO = Inst.getOperand(OpIdx);
4101     if (!MO.isImm() && !MO.isExpr())
4102       continue;
4103     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4104       continue;
4105 
4106     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4107         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4108       Error(getConstLoc(Operands),
4109             "inline constants are not allowed for this operand");
4110       return false;
4111     }
4112 
4113     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4114       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4115       if (NumLiterals == 0 || LiteralValue != Value) {
4116         LiteralValue = Value;
4117         ++NumLiterals;
4118       }
4119     } else if (MO.isExpr()) {
4120       ++NumExprs;
4121     }
4122   }
4123   NumLiterals += NumExprs;
4124 
4125   if (!NumLiterals)
4126     return true;
4127 
4128   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4129     Error(getLitLoc(Operands), "literal operands are not supported");
4130     return false;
4131   }
4132 
4133   if (NumLiterals > 1) {
4134     Error(getLitLoc(Operands), "only one literal operand is allowed");
4135     return false;
4136   }
4137 
4138   return true;
4139 }
4140 
4141 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4142 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4143                          const MCRegisterInfo *MRI) {
4144   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4145   if (OpIdx < 0)
4146     return -1;
4147 
4148   const MCOperand &Op = Inst.getOperand(OpIdx);
4149   if (!Op.isReg())
4150     return -1;
4151 
4152   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4153   auto Reg = Sub ? Sub : Op.getReg();
4154   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4155   return AGPR32.contains(Reg) ? 1 : 0;
4156 }
4157 
4158 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4159   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4160   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4161                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4162                   SIInstrFlags::DS)) == 0)
4163     return true;
4164 
4165   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4166                                                       : AMDGPU::OpName::vdata;
4167 
4168   const MCRegisterInfo *MRI = getMRI();
4169   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4170   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4171 
4172   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4173     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4174     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4175       return false;
4176   }
4177 
4178   auto FB = getFeatureBits();
4179   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4180     if (DataAreg < 0 || DstAreg < 0)
4181       return true;
4182     return DstAreg == DataAreg;
4183   }
4184 
4185   return DstAreg < 1 && DataAreg < 1;
4186 }
4187 
4188 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4189   auto FB = getFeatureBits();
4190   if (!FB[AMDGPU::FeatureGFX90AInsts])
4191     return true;
4192 
4193   const MCRegisterInfo *MRI = getMRI();
4194   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4195   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4196   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4197     const MCOperand &Op = Inst.getOperand(I);
4198     if (!Op.isReg())
4199       continue;
4200 
4201     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4202     if (!Sub)
4203       continue;
4204 
4205     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4206       return false;
4207     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4208       return false;
4209   }
4210 
4211   return true;
4212 }
4213 
4214 // gfx90a has an undocumented limitation:
4215 // DS_GWS opcodes must use even aligned registers.
4216 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4217                                   const OperandVector &Operands) {
4218   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4219     return true;
4220 
4221   int Opc = Inst.getOpcode();
4222   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4223       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4224     return true;
4225 
4226   const MCRegisterInfo *MRI = getMRI();
4227   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4228   int Data0Pos =
4229       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4230   assert(Data0Pos != -1);
4231   auto Reg = Inst.getOperand(Data0Pos).getReg();
4232   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4233   if (RegIdx & 1) {
4234     SMLoc RegLoc = getRegLoc(Reg, Operands);
4235     Error(RegLoc, "vgpr must be even aligned");
4236     return false;
4237   }
4238 
4239   return true;
4240 }
4241 
4242 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4243                                             const OperandVector &Operands,
4244                                             const SMLoc &IDLoc) {
4245   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4246                                            AMDGPU::OpName::cpol);
4247   if (CPolPos == -1)
4248     return true;
4249 
4250   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4251 
4252   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4253   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4254       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4255     Error(IDLoc, "invalid cache policy for SMRD instruction");
4256     return false;
4257   }
4258 
4259   if (isGFX90A() && (CPol & CPol::SCC)) {
4260     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4261     StringRef CStr(S.getPointer());
4262     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4263     Error(S, "scc is not supported on this GPU");
4264     return false;
4265   }
4266 
4267   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4268     return true;
4269 
4270   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4271     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4272       Error(IDLoc, "instruction must use glc");
4273       return false;
4274     }
4275   } else {
4276     if (CPol & CPol::GLC) {
4277       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4278       StringRef CStr(S.getPointer());
4279       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4280       Error(S, "instruction must not use glc");
4281       return false;
4282     }
4283   }
4284 
4285   return true;
4286 }
4287 
4288 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4289                                           const SMLoc &IDLoc,
4290                                           const OperandVector &Operands) {
4291   if (auto ErrMsg = validateLdsDirect(Inst)) {
4292     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4293     return false;
4294   }
4295   if (!validateSOPLiteral(Inst)) {
4296     Error(getLitLoc(Operands),
4297       "only one literal operand is allowed");
4298     return false;
4299   }
4300   if (!validateVOPLiteral(Inst, Operands)) {
4301     return false;
4302   }
4303   if (!validateConstantBusLimitations(Inst, Operands)) {
4304     return false;
4305   }
4306   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4307     return false;
4308   }
4309   if (!validateIntClampSupported(Inst)) {
4310     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4311       "integer clamping is not supported on this GPU");
4312     return false;
4313   }
4314   if (!validateOpSel(Inst)) {
4315     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4316       "invalid op_sel operand");
4317     return false;
4318   }
4319   if (!validateDPP(Inst, Operands)) {
4320     return false;
4321   }
4322   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4323   if (!validateMIMGD16(Inst)) {
4324     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4325       "d16 modifier is not supported on this GPU");
4326     return false;
4327   }
4328   if (!validateMIMGDim(Inst)) {
4329     Error(IDLoc, "dim modifier is required on this GPU");
4330     return false;
4331   }
4332   if (!validateMIMGMSAA(Inst)) {
4333     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4334           "invalid dim; must be MSAA type");
4335     return false;
4336   }
4337   if (!validateMIMGDataSize(Inst)) {
4338     Error(IDLoc,
4339       "image data size does not match dmask and tfe");
4340     return false;
4341   }
4342   if (!validateMIMGAddrSize(Inst)) {
4343     Error(IDLoc,
4344       "image address size does not match dim and a16");
4345     return false;
4346   }
4347   if (!validateMIMGAtomicDMask(Inst)) {
4348     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4349       "invalid atomic image dmask");
4350     return false;
4351   }
4352   if (!validateMIMGGatherDMask(Inst)) {
4353     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4354       "invalid image_gather dmask: only one bit must be set");
4355     return false;
4356   }
4357   if (!validateMovrels(Inst, Operands)) {
4358     return false;
4359   }
4360   if (!validateFlatOffset(Inst, Operands)) {
4361     return false;
4362   }
4363   if (!validateSMEMOffset(Inst, Operands)) {
4364     return false;
4365   }
4366   if (!validateMAIAccWrite(Inst, Operands)) {
4367     return false;
4368   }
4369   if (!validateMFMA(Inst, Operands)) {
4370     return false;
4371   }
4372   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4373     return false;
4374   }
4375 
4376   if (!validateAGPRLdSt(Inst)) {
4377     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4378     ? "invalid register class: data and dst should be all VGPR or AGPR"
4379     : "invalid register class: agpr loads and stores not supported on this GPU"
4380     );
4381     return false;
4382   }
4383   if (!validateVGPRAlign(Inst)) {
4384     Error(IDLoc,
4385       "invalid register class: vgpr tuples must be 64 bit aligned");
4386     return false;
4387   }
4388   if (!validateGWS(Inst, Operands)) {
4389     return false;
4390   }
4391 
4392   if (!validateDivScale(Inst)) {
4393     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4394     return false;
4395   }
4396   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4397     return false;
4398   }
4399 
4400   return true;
4401 }
4402 
4403 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4404                                             const FeatureBitset &FBS,
4405                                             unsigned VariantID = 0);
4406 
4407 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4408                                 const FeatureBitset &AvailableFeatures,
4409                                 unsigned VariantID);
4410 
4411 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4412                                        const FeatureBitset &FBS) {
4413   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4414 }
4415 
4416 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4417                                        const FeatureBitset &FBS,
4418                                        ArrayRef<unsigned> Variants) {
4419   for (auto Variant : Variants) {
4420     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4421       return true;
4422   }
4423 
4424   return false;
4425 }
4426 
4427 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4428                                                   const SMLoc &IDLoc) {
4429   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4430 
4431   // Check if requested instruction variant is supported.
4432   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4433     return false;
4434 
4435   // This instruction is not supported.
4436   // Clear any other pending errors because they are no longer relevant.
4437   getParser().clearPendingErrors();
4438 
4439   // Requested instruction variant is not supported.
4440   // Check if any other variants are supported.
4441   StringRef VariantName = getMatchedVariantName();
4442   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4443     return Error(IDLoc,
4444                  Twine(VariantName,
4445                        " variant of this instruction is not supported"));
4446   }
4447 
4448   // Finally check if this instruction is supported on any other GPU.
4449   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4450     return Error(IDLoc, "instruction not supported on this GPU");
4451   }
4452 
4453   // Instruction not supported on any GPU. Probably a typo.
4454   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4455   return Error(IDLoc, "invalid instruction" + Suggestion);
4456 }
4457 
4458 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4459                                               OperandVector &Operands,
4460                                               MCStreamer &Out,
4461                                               uint64_t &ErrorInfo,
4462                                               bool MatchingInlineAsm) {
4463   MCInst Inst;
4464   unsigned Result = Match_Success;
4465   for (auto Variant : getMatchedVariants()) {
4466     uint64_t EI;
4467     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4468                                   Variant);
4469     // We order match statuses from least to most specific. We use most specific
4470     // status as resulting
4471     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4472     if ((R == Match_Success) ||
4473         (R == Match_PreferE32) ||
4474         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4475         (R == Match_InvalidOperand && Result != Match_MissingFeature
4476                                    && Result != Match_PreferE32) ||
4477         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4478                                    && Result != Match_MissingFeature
4479                                    && Result != Match_PreferE32)) {
4480       Result = R;
4481       ErrorInfo = EI;
4482     }
4483     if (R == Match_Success)
4484       break;
4485   }
4486 
4487   if (Result == Match_Success) {
4488     if (!validateInstruction(Inst, IDLoc, Operands)) {
4489       return true;
4490     }
4491     Inst.setLoc(IDLoc);
4492     Out.emitInstruction(Inst, getSTI());
4493     return false;
4494   }
4495 
4496   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4497   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4498     return true;
4499   }
4500 
4501   switch (Result) {
4502   default: break;
4503   case Match_MissingFeature:
4504     // It has been verified that the specified instruction
4505     // mnemonic is valid. A match was found but it requires
4506     // features which are not supported on this GPU.
4507     return Error(IDLoc, "operands are not valid for this GPU or mode");
4508 
4509   case Match_InvalidOperand: {
4510     SMLoc ErrorLoc = IDLoc;
4511     if (ErrorInfo != ~0ULL) {
4512       if (ErrorInfo >= Operands.size()) {
4513         return Error(IDLoc, "too few operands for instruction");
4514       }
4515       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4516       if (ErrorLoc == SMLoc())
4517         ErrorLoc = IDLoc;
4518     }
4519     return Error(ErrorLoc, "invalid operand for instruction");
4520   }
4521 
4522   case Match_PreferE32:
4523     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4524                         "should be encoded as e32");
4525   case Match_MnemonicFail:
4526     llvm_unreachable("Invalid instructions should have been handled already");
4527   }
4528   llvm_unreachable("Implement any new match types added!");
4529 }
4530 
4531 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4532   int64_t Tmp = -1;
4533   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4534     return true;
4535   }
4536   if (getParser().parseAbsoluteExpression(Tmp)) {
4537     return true;
4538   }
4539   Ret = static_cast<uint32_t>(Tmp);
4540   return false;
4541 }
4542 
4543 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4544                                                uint32_t &Minor) {
4545   if (ParseAsAbsoluteExpression(Major))
4546     return TokError("invalid major version");
4547 
4548   if (!trySkipToken(AsmToken::Comma))
4549     return TokError("minor version number required, comma expected");
4550 
4551   if (ParseAsAbsoluteExpression(Minor))
4552     return TokError("invalid minor version");
4553 
4554   return false;
4555 }
4556 
4557 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4558   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4559     return TokError("directive only supported for amdgcn architecture");
4560 
4561   std::string TargetIDDirective;
4562   SMLoc TargetStart = getTok().getLoc();
4563   if (getParser().parseEscapedString(TargetIDDirective))
4564     return true;
4565 
4566   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4567   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4568     return getParser().Error(TargetRange.Start,
4569         (Twine(".amdgcn_target directive's target id ") +
4570          Twine(TargetIDDirective) +
4571          Twine(" does not match the specified target id ") +
4572          Twine(getTargetStreamer().getTargetID()->toString())).str());
4573 
4574   return false;
4575 }
4576 
4577 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4578   return Error(Range.Start, "value out of range", Range);
4579 }
4580 
4581 bool AMDGPUAsmParser::calculateGPRBlocks(
4582     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4583     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4584     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4585     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4586   // TODO(scott.linder): These calculations are duplicated from
4587   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4588   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4589 
4590   unsigned NumVGPRs = NextFreeVGPR;
4591   unsigned NumSGPRs = NextFreeSGPR;
4592 
4593   if (Version.Major >= 10)
4594     NumSGPRs = 0;
4595   else {
4596     unsigned MaxAddressableNumSGPRs =
4597         IsaInfo::getAddressableNumSGPRs(&getSTI());
4598 
4599     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4600         NumSGPRs > MaxAddressableNumSGPRs)
4601       return OutOfRangeError(SGPRRange);
4602 
4603     NumSGPRs +=
4604         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4605 
4606     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4607         NumSGPRs > MaxAddressableNumSGPRs)
4608       return OutOfRangeError(SGPRRange);
4609 
4610     if (Features.test(FeatureSGPRInitBug))
4611       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4612   }
4613 
4614   VGPRBlocks =
4615       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4616   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4617 
4618   return false;
4619 }
4620 
4621 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4622   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4623     return TokError("directive only supported for amdgcn architecture");
4624 
4625   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4626     return TokError("directive only supported for amdhsa OS");
4627 
4628   StringRef KernelName;
4629   if (getParser().parseIdentifier(KernelName))
4630     return true;
4631 
4632   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4633 
4634   StringSet<> Seen;
4635 
4636   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4637 
4638   SMRange VGPRRange;
4639   uint64_t NextFreeVGPR = 0;
4640   uint64_t AccumOffset = 0;
4641   SMRange SGPRRange;
4642   uint64_t NextFreeSGPR = 0;
4643 
4644   // Count the number of user SGPRs implied from the enabled feature bits.
4645   unsigned ImpliedUserSGPRCount = 0;
4646 
4647   // Track if the asm explicitly contains the directive for the user SGPR
4648   // count.
4649   Optional<unsigned> ExplicitUserSGPRCount;
4650   bool ReserveVCC = true;
4651   bool ReserveFlatScr = true;
4652   Optional<bool> EnableWavefrontSize32;
4653 
4654   while (true) {
4655     while (trySkipToken(AsmToken::EndOfStatement));
4656 
4657     StringRef ID;
4658     SMRange IDRange = getTok().getLocRange();
4659     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4660       return true;
4661 
4662     if (ID == ".end_amdhsa_kernel")
4663       break;
4664 
4665     if (Seen.find(ID) != Seen.end())
4666       return TokError(".amdhsa_ directives cannot be repeated");
4667     Seen.insert(ID);
4668 
4669     SMLoc ValStart = getLoc();
4670     int64_t IVal;
4671     if (getParser().parseAbsoluteExpression(IVal))
4672       return true;
4673     SMLoc ValEnd = getLoc();
4674     SMRange ValRange = SMRange(ValStart, ValEnd);
4675 
4676     if (IVal < 0)
4677       return OutOfRangeError(ValRange);
4678 
4679     uint64_t Val = IVal;
4680 
4681 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4682   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4683     return OutOfRangeError(RANGE);                                             \
4684   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4685 
4686     if (ID == ".amdhsa_group_segment_fixed_size") {
4687       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4688         return OutOfRangeError(ValRange);
4689       KD.group_segment_fixed_size = Val;
4690     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4691       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4692         return OutOfRangeError(ValRange);
4693       KD.private_segment_fixed_size = Val;
4694     } else if (ID == ".amdhsa_kernarg_size") {
4695       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4696         return OutOfRangeError(ValRange);
4697       KD.kernarg_size = Val;
4698     } else if (ID == ".amdhsa_user_sgpr_count") {
4699       ExplicitUserSGPRCount = Val;
4700     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4701       if (hasArchitectedFlatScratch())
4702         return Error(IDRange.Start,
4703                      "directive is not supported with architected flat scratch",
4704                      IDRange);
4705       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4706                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4707                        Val, ValRange);
4708       if (Val)
4709         ImpliedUserSGPRCount += 4;
4710     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4711       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4712                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4713                        ValRange);
4714       if (Val)
4715         ImpliedUserSGPRCount += 2;
4716     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4717       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4718                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4719                        ValRange);
4720       if (Val)
4721         ImpliedUserSGPRCount += 2;
4722     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4723       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4724                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4725                        Val, ValRange);
4726       if (Val)
4727         ImpliedUserSGPRCount += 2;
4728     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4729       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4730                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4731                        ValRange);
4732       if (Val)
4733         ImpliedUserSGPRCount += 2;
4734     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4735       if (hasArchitectedFlatScratch())
4736         return Error(IDRange.Start,
4737                      "directive is not supported with architected flat scratch",
4738                      IDRange);
4739       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4740                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4741                        ValRange);
4742       if (Val)
4743         ImpliedUserSGPRCount += 2;
4744     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4745       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4746                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4747                        Val, ValRange);
4748       if (Val)
4749         ImpliedUserSGPRCount += 1;
4750     } else if (ID == ".amdhsa_wavefront_size32") {
4751       if (IVersion.Major < 10)
4752         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4753       EnableWavefrontSize32 = Val;
4754       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4755                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4756                        Val, ValRange);
4757     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4758       if (hasArchitectedFlatScratch())
4759         return Error(IDRange.Start,
4760                      "directive is not supported with architected flat scratch",
4761                      IDRange);
4762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4763                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4764     } else if (ID == ".amdhsa_enable_private_segment") {
4765       if (!hasArchitectedFlatScratch())
4766         return Error(
4767             IDRange.Start,
4768             "directive is not supported without architected flat scratch",
4769             IDRange);
4770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4771                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4772     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4773       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4774                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4775                        ValRange);
4776     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4778                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4779                        ValRange);
4780     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4782                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4783                        ValRange);
4784     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4787                        ValRange);
4788     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4790                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4791                        ValRange);
4792     } else if (ID == ".amdhsa_next_free_vgpr") {
4793       VGPRRange = ValRange;
4794       NextFreeVGPR = Val;
4795     } else if (ID == ".amdhsa_next_free_sgpr") {
4796       SGPRRange = ValRange;
4797       NextFreeSGPR = Val;
4798     } else if (ID == ".amdhsa_accum_offset") {
4799       if (!isGFX90A())
4800         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4801       AccumOffset = Val;
4802     } else if (ID == ".amdhsa_reserve_vcc") {
4803       if (!isUInt<1>(Val))
4804         return OutOfRangeError(ValRange);
4805       ReserveVCC = Val;
4806     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4807       if (IVersion.Major < 7)
4808         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4809       if (hasArchitectedFlatScratch())
4810         return Error(IDRange.Start,
4811                      "directive is not supported with architected flat scratch",
4812                      IDRange);
4813       if (!isUInt<1>(Val))
4814         return OutOfRangeError(ValRange);
4815       ReserveFlatScr = Val;
4816     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4817       if (IVersion.Major < 8)
4818         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4819       if (!isUInt<1>(Val))
4820         return OutOfRangeError(ValRange);
4821       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4822         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4823                                  IDRange);
4824     } else if (ID == ".amdhsa_float_round_mode_32") {
4825       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4826                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4827     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4828       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4829                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4830     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4831       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4832                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4833     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4834       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4835                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4836                        ValRange);
4837     } else if (ID == ".amdhsa_dx10_clamp") {
4838       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4839                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4840     } else if (ID == ".amdhsa_ieee_mode") {
4841       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4842                        Val, ValRange);
4843     } else if (ID == ".amdhsa_fp16_overflow") {
4844       if (IVersion.Major < 9)
4845         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4846       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4847                        ValRange);
4848     } else if (ID == ".amdhsa_tg_split") {
4849       if (!isGFX90A())
4850         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4852                        ValRange);
4853     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4854       if (IVersion.Major < 10)
4855         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4856       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4857                        ValRange);
4858     } else if (ID == ".amdhsa_memory_ordered") {
4859       if (IVersion.Major < 10)
4860         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4862                        ValRange);
4863     } else if (ID == ".amdhsa_forward_progress") {
4864       if (IVersion.Major < 10)
4865         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4866       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4867                        ValRange);
4868     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4869       PARSE_BITS_ENTRY(
4870           KD.compute_pgm_rsrc2,
4871           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4872           ValRange);
4873     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4875                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4876                        Val, ValRange);
4877     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4878       PARSE_BITS_ENTRY(
4879           KD.compute_pgm_rsrc2,
4880           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4881           ValRange);
4882     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4883       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4884                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4885                        Val, ValRange);
4886     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4887       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4888                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4889                        Val, ValRange);
4890     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4891       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4892                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4893                        Val, ValRange);
4894     } else if (ID == ".amdhsa_exception_int_div_zero") {
4895       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4896                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4897                        Val, ValRange);
4898     } else {
4899       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4900     }
4901 
4902 #undef PARSE_BITS_ENTRY
4903   }
4904 
4905   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4906     return TokError(".amdhsa_next_free_vgpr directive is required");
4907 
4908   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4909     return TokError(".amdhsa_next_free_sgpr directive is required");
4910 
4911   unsigned VGPRBlocks;
4912   unsigned SGPRBlocks;
4913   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4914                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4915                          EnableWavefrontSize32, NextFreeVGPR,
4916                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4917                          SGPRBlocks))
4918     return true;
4919 
4920   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4921           VGPRBlocks))
4922     return OutOfRangeError(VGPRRange);
4923   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4924                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4925 
4926   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4927           SGPRBlocks))
4928     return OutOfRangeError(SGPRRange);
4929   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4930                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4931                   SGPRBlocks);
4932 
4933   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4934     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4935                     "enabled user SGPRs");
4936 
4937   unsigned UserSGPRCount =
4938       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4939 
4940   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4941     return TokError("too many user SGPRs enabled");
4942   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4943                   UserSGPRCount);
4944 
4945   if (isGFX90A()) {
4946     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4947       return TokError(".amdhsa_accum_offset directive is required");
4948     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4949       return TokError("accum_offset should be in range [4..256] in "
4950                       "increments of 4");
4951     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4952       return TokError("accum_offset exceeds total VGPR allocation");
4953     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4954                     (AccumOffset / 4 - 1));
4955   }
4956 
4957   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4958       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4959       ReserveFlatScr);
4960   return false;
4961 }
4962 
4963 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4964   uint32_t Major;
4965   uint32_t Minor;
4966 
4967   if (ParseDirectiveMajorMinor(Major, Minor))
4968     return true;
4969 
4970   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4971   return false;
4972 }
4973 
4974 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4975   uint32_t Major;
4976   uint32_t Minor;
4977   uint32_t Stepping;
4978   StringRef VendorName;
4979   StringRef ArchName;
4980 
4981   // If this directive has no arguments, then use the ISA version for the
4982   // targeted GPU.
4983   if (isToken(AsmToken::EndOfStatement)) {
4984     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4985     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4986                                                         ISA.Stepping,
4987                                                         "AMD", "AMDGPU");
4988     return false;
4989   }
4990 
4991   if (ParseDirectiveMajorMinor(Major, Minor))
4992     return true;
4993 
4994   if (!trySkipToken(AsmToken::Comma))
4995     return TokError("stepping version number required, comma expected");
4996 
4997   if (ParseAsAbsoluteExpression(Stepping))
4998     return TokError("invalid stepping version");
4999 
5000   if (!trySkipToken(AsmToken::Comma))
5001     return TokError("vendor name required, comma expected");
5002 
5003   if (!parseString(VendorName, "invalid vendor name"))
5004     return true;
5005 
5006   if (!trySkipToken(AsmToken::Comma))
5007     return TokError("arch name required, comma expected");
5008 
5009   if (!parseString(ArchName, "invalid arch name"))
5010     return true;
5011 
5012   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5013                                                       VendorName, ArchName);
5014   return false;
5015 }
5016 
5017 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5018                                                amd_kernel_code_t &Header) {
5019   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5020   // assembly for backwards compatibility.
5021   if (ID == "max_scratch_backing_memory_byte_size") {
5022     Parser.eatToEndOfStatement();
5023     return false;
5024   }
5025 
5026   SmallString<40> ErrStr;
5027   raw_svector_ostream Err(ErrStr);
5028   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5029     return TokError(Err.str());
5030   }
5031   Lex();
5032 
5033   if (ID == "enable_wavefront_size32") {
5034     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5035       if (!isGFX10Plus())
5036         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5037       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5038         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5039     } else {
5040       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5041         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5042     }
5043   }
5044 
5045   if (ID == "wavefront_size") {
5046     if (Header.wavefront_size == 5) {
5047       if (!isGFX10Plus())
5048         return TokError("wavefront_size=5 is only allowed on GFX10+");
5049       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5050         return TokError("wavefront_size=5 requires +WavefrontSize32");
5051     } else if (Header.wavefront_size == 6) {
5052       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5053         return TokError("wavefront_size=6 requires +WavefrontSize64");
5054     }
5055   }
5056 
5057   if (ID == "enable_wgp_mode") {
5058     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5059         !isGFX10Plus())
5060       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5061   }
5062 
5063   if (ID == "enable_mem_ordered") {
5064     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5065         !isGFX10Plus())
5066       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5067   }
5068 
5069   if (ID == "enable_fwd_progress") {
5070     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5071         !isGFX10Plus())
5072       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5073   }
5074 
5075   return false;
5076 }
5077 
5078 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5079   amd_kernel_code_t Header;
5080   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5081 
5082   while (true) {
5083     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5084     // will set the current token to EndOfStatement.
5085     while(trySkipToken(AsmToken::EndOfStatement));
5086 
5087     StringRef ID;
5088     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5089       return true;
5090 
5091     if (ID == ".end_amd_kernel_code_t")
5092       break;
5093 
5094     if (ParseAMDKernelCodeTValue(ID, Header))
5095       return true;
5096   }
5097 
5098   getTargetStreamer().EmitAMDKernelCodeT(Header);
5099 
5100   return false;
5101 }
5102 
5103 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5104   StringRef KernelName;
5105   if (!parseId(KernelName, "expected symbol name"))
5106     return true;
5107 
5108   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5109                                            ELF::STT_AMDGPU_HSA_KERNEL);
5110 
5111   KernelScope.initialize(getContext());
5112   return false;
5113 }
5114 
5115 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5116   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5117     return Error(getLoc(),
5118                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5119                  "architectures");
5120   }
5121 
5122   auto TargetIDDirective = getLexer().getTok().getStringContents();
5123   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5124     return Error(getParser().getTok().getLoc(), "target id must match options");
5125 
5126   getTargetStreamer().EmitISAVersion();
5127   Lex();
5128 
5129   return false;
5130 }
5131 
5132 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5133   const char *AssemblerDirectiveBegin;
5134   const char *AssemblerDirectiveEnd;
5135   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5136       isHsaAbiVersion3AndAbove(&getSTI())
5137           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5138                             HSAMD::V3::AssemblerDirectiveEnd)
5139           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5140                             HSAMD::AssemblerDirectiveEnd);
5141 
5142   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5143     return Error(getLoc(),
5144                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5145                  "not available on non-amdhsa OSes")).str());
5146   }
5147 
5148   std::string HSAMetadataString;
5149   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5150                           HSAMetadataString))
5151     return true;
5152 
5153   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5154     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5155       return Error(getLoc(), "invalid HSA metadata");
5156   } else {
5157     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5158       return Error(getLoc(), "invalid HSA metadata");
5159   }
5160 
5161   return false;
5162 }
5163 
5164 /// Common code to parse out a block of text (typically YAML) between start and
5165 /// end directives.
5166 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5167                                           const char *AssemblerDirectiveEnd,
5168                                           std::string &CollectString) {
5169 
5170   raw_string_ostream CollectStream(CollectString);
5171 
5172   getLexer().setSkipSpace(false);
5173 
5174   bool FoundEnd = false;
5175   while (!isToken(AsmToken::Eof)) {
5176     while (isToken(AsmToken::Space)) {
5177       CollectStream << getTokenStr();
5178       Lex();
5179     }
5180 
5181     if (trySkipId(AssemblerDirectiveEnd)) {
5182       FoundEnd = true;
5183       break;
5184     }
5185 
5186     CollectStream << Parser.parseStringToEndOfStatement()
5187                   << getContext().getAsmInfo()->getSeparatorString();
5188 
5189     Parser.eatToEndOfStatement();
5190   }
5191 
5192   getLexer().setSkipSpace(true);
5193 
5194   if (isToken(AsmToken::Eof) && !FoundEnd) {
5195     return TokError(Twine("expected directive ") +
5196                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5197   }
5198 
5199   CollectStream.flush();
5200   return false;
5201 }
5202 
5203 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5204 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5205   std::string String;
5206   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5207                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5208     return true;
5209 
5210   auto PALMetadata = getTargetStreamer().getPALMetadata();
5211   if (!PALMetadata->setFromString(String))
5212     return Error(getLoc(), "invalid PAL metadata");
5213   return false;
5214 }
5215 
5216 /// Parse the assembler directive for old linear-format PAL metadata.
5217 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5218   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5219     return Error(getLoc(),
5220                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5221                  "not available on non-amdpal OSes")).str());
5222   }
5223 
5224   auto PALMetadata = getTargetStreamer().getPALMetadata();
5225   PALMetadata->setLegacy();
5226   for (;;) {
5227     uint32_t Key, Value;
5228     if (ParseAsAbsoluteExpression(Key)) {
5229       return TokError(Twine("invalid value in ") +
5230                       Twine(PALMD::AssemblerDirective));
5231     }
5232     if (!trySkipToken(AsmToken::Comma)) {
5233       return TokError(Twine("expected an even number of values in ") +
5234                       Twine(PALMD::AssemblerDirective));
5235     }
5236     if (ParseAsAbsoluteExpression(Value)) {
5237       return TokError(Twine("invalid value in ") +
5238                       Twine(PALMD::AssemblerDirective));
5239     }
5240     PALMetadata->setRegister(Key, Value);
5241     if (!trySkipToken(AsmToken::Comma))
5242       break;
5243   }
5244   return false;
5245 }
5246 
5247 /// ParseDirectiveAMDGPULDS
5248 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5249 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5250   if (getParser().checkForValidSection())
5251     return true;
5252 
5253   StringRef Name;
5254   SMLoc NameLoc = getLoc();
5255   if (getParser().parseIdentifier(Name))
5256     return TokError("expected identifier in directive");
5257 
5258   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5259   if (parseToken(AsmToken::Comma, "expected ','"))
5260     return true;
5261 
5262   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5263 
5264   int64_t Size;
5265   SMLoc SizeLoc = getLoc();
5266   if (getParser().parseAbsoluteExpression(Size))
5267     return true;
5268   if (Size < 0)
5269     return Error(SizeLoc, "size must be non-negative");
5270   if (Size > LocalMemorySize)
5271     return Error(SizeLoc, "size is too large");
5272 
5273   int64_t Alignment = 4;
5274   if (trySkipToken(AsmToken::Comma)) {
5275     SMLoc AlignLoc = getLoc();
5276     if (getParser().parseAbsoluteExpression(Alignment))
5277       return true;
5278     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5279       return Error(AlignLoc, "alignment must be a power of two");
5280 
5281     // Alignment larger than the size of LDS is possible in theory, as long
5282     // as the linker manages to place to symbol at address 0, but we do want
5283     // to make sure the alignment fits nicely into a 32-bit integer.
5284     if (Alignment >= 1u << 31)
5285       return Error(AlignLoc, "alignment is too large");
5286   }
5287 
5288   if (parseToken(AsmToken::EndOfStatement,
5289                  "unexpected token in '.amdgpu_lds' directive"))
5290     return true;
5291 
5292   Symbol->redefineIfPossible();
5293   if (!Symbol->isUndefined())
5294     return Error(NameLoc, "invalid symbol redefinition");
5295 
5296   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5297   return false;
5298 }
5299 
5300 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5301   StringRef IDVal = DirectiveID.getString();
5302 
5303   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5304     if (IDVal == ".amdhsa_kernel")
5305      return ParseDirectiveAMDHSAKernel();
5306 
5307     // TODO: Restructure/combine with PAL metadata directive.
5308     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5309       return ParseDirectiveHSAMetadata();
5310   } else {
5311     if (IDVal == ".hsa_code_object_version")
5312       return ParseDirectiveHSACodeObjectVersion();
5313 
5314     if (IDVal == ".hsa_code_object_isa")
5315       return ParseDirectiveHSACodeObjectISA();
5316 
5317     if (IDVal == ".amd_kernel_code_t")
5318       return ParseDirectiveAMDKernelCodeT();
5319 
5320     if (IDVal == ".amdgpu_hsa_kernel")
5321       return ParseDirectiveAMDGPUHsaKernel();
5322 
5323     if (IDVal == ".amd_amdgpu_isa")
5324       return ParseDirectiveISAVersion();
5325 
5326     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5327       return ParseDirectiveHSAMetadata();
5328   }
5329 
5330   if (IDVal == ".amdgcn_target")
5331     return ParseDirectiveAMDGCNTarget();
5332 
5333   if (IDVal == ".amdgpu_lds")
5334     return ParseDirectiveAMDGPULDS();
5335 
5336   if (IDVal == PALMD::AssemblerDirectiveBegin)
5337     return ParseDirectivePALMetadataBegin();
5338 
5339   if (IDVal == PALMD::AssemblerDirective)
5340     return ParseDirectivePALMetadata();
5341 
5342   return true;
5343 }
5344 
5345 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5346                                            unsigned RegNo) {
5347 
5348   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5349        R.isValid(); ++R) {
5350     if (*R == RegNo)
5351       return isGFX9Plus();
5352   }
5353 
5354   // GFX10 has 2 more SGPRs 104 and 105.
5355   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5356        R.isValid(); ++R) {
5357     if (*R == RegNo)
5358       return hasSGPR104_SGPR105();
5359   }
5360 
5361   switch (RegNo) {
5362   case AMDGPU::SRC_SHARED_BASE:
5363   case AMDGPU::SRC_SHARED_LIMIT:
5364   case AMDGPU::SRC_PRIVATE_BASE:
5365   case AMDGPU::SRC_PRIVATE_LIMIT:
5366   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5367     return isGFX9Plus();
5368   case AMDGPU::TBA:
5369   case AMDGPU::TBA_LO:
5370   case AMDGPU::TBA_HI:
5371   case AMDGPU::TMA:
5372   case AMDGPU::TMA_LO:
5373   case AMDGPU::TMA_HI:
5374     return !isGFX9Plus();
5375   case AMDGPU::XNACK_MASK:
5376   case AMDGPU::XNACK_MASK_LO:
5377   case AMDGPU::XNACK_MASK_HI:
5378     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5379   case AMDGPU::SGPR_NULL:
5380     return isGFX10Plus();
5381   default:
5382     break;
5383   }
5384 
5385   if (isCI())
5386     return true;
5387 
5388   if (isSI() || isGFX10Plus()) {
5389     // No flat_scr on SI.
5390     // On GFX10 flat scratch is not a valid register operand and can only be
5391     // accessed with s_setreg/s_getreg.
5392     switch (RegNo) {
5393     case AMDGPU::FLAT_SCR:
5394     case AMDGPU::FLAT_SCR_LO:
5395     case AMDGPU::FLAT_SCR_HI:
5396       return false;
5397     default:
5398       return true;
5399     }
5400   }
5401 
5402   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5403   // SI/CI have.
5404   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5405        R.isValid(); ++R) {
5406     if (*R == RegNo)
5407       return hasSGPR102_SGPR103();
5408   }
5409 
5410   return true;
5411 }
5412 
5413 OperandMatchResultTy
5414 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5415                               OperandMode Mode) {
5416   // Try to parse with a custom parser
5417   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5418 
5419   // If we successfully parsed the operand or if there as an error parsing,
5420   // we are done.
5421   //
5422   // If we are parsing after we reach EndOfStatement then this means we
5423   // are appending default values to the Operands list.  This is only done
5424   // by custom parser, so we shouldn't continue on to the generic parsing.
5425   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5426       isToken(AsmToken::EndOfStatement))
5427     return ResTy;
5428 
5429   SMLoc RBraceLoc;
5430   SMLoc LBraceLoc = getLoc();
5431   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5432     unsigned Prefix = Operands.size();
5433 
5434     for (;;) {
5435       auto Loc = getLoc();
5436       ResTy = parseReg(Operands);
5437       if (ResTy == MatchOperand_NoMatch)
5438         Error(Loc, "expected a register");
5439       if (ResTy != MatchOperand_Success)
5440         return MatchOperand_ParseFail;
5441 
5442       RBraceLoc = getLoc();
5443       if (trySkipToken(AsmToken::RBrac))
5444         break;
5445 
5446       if (!skipToken(AsmToken::Comma,
5447                      "expected a comma or a closing square bracket")) {
5448         return MatchOperand_ParseFail;
5449       }
5450     }
5451 
5452     if (Operands.size() - Prefix > 1) {
5453       Operands.insert(Operands.begin() + Prefix,
5454                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5455       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5456     }
5457 
5458     return MatchOperand_Success;
5459   }
5460 
5461   return parseRegOrImm(Operands);
5462 }
5463 
5464 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5465   // Clear any forced encodings from the previous instruction.
5466   setForcedEncodingSize(0);
5467   setForcedDPP(false);
5468   setForcedSDWA(false);
5469 
5470   if (Name.endswith("_e64")) {
5471     setForcedEncodingSize(64);
5472     return Name.substr(0, Name.size() - 4);
5473   } else if (Name.endswith("_e32")) {
5474     setForcedEncodingSize(32);
5475     return Name.substr(0, Name.size() - 4);
5476   } else if (Name.endswith("_dpp")) {
5477     setForcedDPP(true);
5478     return Name.substr(0, Name.size() - 4);
5479   } else if (Name.endswith("_sdwa")) {
5480     setForcedSDWA(true);
5481     return Name.substr(0, Name.size() - 5);
5482   }
5483   return Name;
5484 }
5485 
5486 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5487                                        StringRef Name,
5488                                        SMLoc NameLoc, OperandVector &Operands) {
5489   // Add the instruction mnemonic
5490   Name = parseMnemonicSuffix(Name);
5491   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5492 
5493   bool IsMIMG = Name.startswith("image_");
5494 
5495   while (!trySkipToken(AsmToken::EndOfStatement)) {
5496     OperandMode Mode = OperandMode_Default;
5497     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5498       Mode = OperandMode_NSA;
5499     CPolSeen = 0;
5500     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5501 
5502     if (Res != MatchOperand_Success) {
5503       checkUnsupportedInstruction(Name, NameLoc);
5504       if (!Parser.hasPendingError()) {
5505         // FIXME: use real operand location rather than the current location.
5506         StringRef Msg =
5507           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5508                                             "not a valid operand.";
5509         Error(getLoc(), Msg);
5510       }
5511       while (!trySkipToken(AsmToken::EndOfStatement)) {
5512         lex();
5513       }
5514       return true;
5515     }
5516 
5517     // Eat the comma or space if there is one.
5518     trySkipToken(AsmToken::Comma);
5519   }
5520 
5521   return false;
5522 }
5523 
5524 //===----------------------------------------------------------------------===//
5525 // Utility functions
5526 //===----------------------------------------------------------------------===//
5527 
5528 OperandMatchResultTy
5529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5530 
5531   if (!trySkipId(Prefix, AsmToken::Colon))
5532     return MatchOperand_NoMatch;
5533 
5534   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5535 }
5536 
5537 OperandMatchResultTy
5538 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5539                                     AMDGPUOperand::ImmTy ImmTy,
5540                                     bool (*ConvertResult)(int64_t&)) {
5541   SMLoc S = getLoc();
5542   int64_t Value = 0;
5543 
5544   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5545   if (Res != MatchOperand_Success)
5546     return Res;
5547 
5548   if (ConvertResult && !ConvertResult(Value)) {
5549     Error(S, "invalid " + StringRef(Prefix) + " value.");
5550   }
5551 
5552   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5553   return MatchOperand_Success;
5554 }
5555 
5556 OperandMatchResultTy
5557 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5558                                              OperandVector &Operands,
5559                                              AMDGPUOperand::ImmTy ImmTy,
5560                                              bool (*ConvertResult)(int64_t&)) {
5561   SMLoc S = getLoc();
5562   if (!trySkipId(Prefix, AsmToken::Colon))
5563     return MatchOperand_NoMatch;
5564 
5565   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5566     return MatchOperand_ParseFail;
5567 
5568   unsigned Val = 0;
5569   const unsigned MaxSize = 4;
5570 
5571   // FIXME: How to verify the number of elements matches the number of src
5572   // operands?
5573   for (int I = 0; ; ++I) {
5574     int64_t Op;
5575     SMLoc Loc = getLoc();
5576     if (!parseExpr(Op))
5577       return MatchOperand_ParseFail;
5578 
5579     if (Op != 0 && Op != 1) {
5580       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5581       return MatchOperand_ParseFail;
5582     }
5583 
5584     Val |= (Op << I);
5585 
5586     if (trySkipToken(AsmToken::RBrac))
5587       break;
5588 
5589     if (I + 1 == MaxSize) {
5590       Error(getLoc(), "expected a closing square bracket");
5591       return MatchOperand_ParseFail;
5592     }
5593 
5594     if (!skipToken(AsmToken::Comma, "expected a comma"))
5595       return MatchOperand_ParseFail;
5596   }
5597 
5598   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5599   return MatchOperand_Success;
5600 }
5601 
5602 OperandMatchResultTy
5603 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5604                                AMDGPUOperand::ImmTy ImmTy) {
5605   int64_t Bit;
5606   SMLoc S = getLoc();
5607 
5608   if (trySkipId(Name)) {
5609     Bit = 1;
5610   } else if (trySkipId("no", Name)) {
5611     Bit = 0;
5612   } else {
5613     return MatchOperand_NoMatch;
5614   }
5615 
5616   if (Name == "r128" && !hasMIMG_R128()) {
5617     Error(S, "r128 modifier is not supported on this GPU");
5618     return MatchOperand_ParseFail;
5619   }
5620   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5621     Error(S, "a16 modifier is not supported on this GPU");
5622     return MatchOperand_ParseFail;
5623   }
5624 
5625   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5626     ImmTy = AMDGPUOperand::ImmTyR128A16;
5627 
5628   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5629   return MatchOperand_Success;
5630 }
5631 
5632 OperandMatchResultTy
5633 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5634   unsigned CPolOn = 0;
5635   unsigned CPolOff = 0;
5636   SMLoc S = getLoc();
5637 
5638   if (trySkipId("glc"))
5639     CPolOn = AMDGPU::CPol::GLC;
5640   else if (trySkipId("noglc"))
5641     CPolOff = AMDGPU::CPol::GLC;
5642   else if (trySkipId("slc"))
5643     CPolOn = AMDGPU::CPol::SLC;
5644   else if (trySkipId("noslc"))
5645     CPolOff = AMDGPU::CPol::SLC;
5646   else if (trySkipId("dlc"))
5647     CPolOn = AMDGPU::CPol::DLC;
5648   else if (trySkipId("nodlc"))
5649     CPolOff = AMDGPU::CPol::DLC;
5650   else if (trySkipId("scc"))
5651     CPolOn = AMDGPU::CPol::SCC;
5652   else if (trySkipId("noscc"))
5653     CPolOff = AMDGPU::CPol::SCC;
5654   else
5655     return MatchOperand_NoMatch;
5656 
5657   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5658     Error(S, "dlc modifier is not supported on this GPU");
5659     return MatchOperand_ParseFail;
5660   }
5661 
5662   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5663     Error(S, "scc modifier is not supported on this GPU");
5664     return MatchOperand_ParseFail;
5665   }
5666 
5667   if (CPolSeen & (CPolOn | CPolOff)) {
5668     Error(S, "duplicate cache policy modifier");
5669     return MatchOperand_ParseFail;
5670   }
5671 
5672   CPolSeen |= (CPolOn | CPolOff);
5673 
5674   for (unsigned I = 1; I != Operands.size(); ++I) {
5675     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5676     if (Op.isCPol()) {
5677       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5678       return MatchOperand_Success;
5679     }
5680   }
5681 
5682   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5683                                               AMDGPUOperand::ImmTyCPol));
5684 
5685   return MatchOperand_Success;
5686 }
5687 
5688 static void addOptionalImmOperand(
5689   MCInst& Inst, const OperandVector& Operands,
5690   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5691   AMDGPUOperand::ImmTy ImmT,
5692   int64_t Default = 0) {
5693   auto i = OptionalIdx.find(ImmT);
5694   if (i != OptionalIdx.end()) {
5695     unsigned Idx = i->second;
5696     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5697   } else {
5698     Inst.addOperand(MCOperand::createImm(Default));
5699   }
5700 }
5701 
5702 OperandMatchResultTy
5703 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5704                                        StringRef &Value,
5705                                        SMLoc &StringLoc) {
5706   if (!trySkipId(Prefix, AsmToken::Colon))
5707     return MatchOperand_NoMatch;
5708 
5709   StringLoc = getLoc();
5710   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5711                                                   : MatchOperand_ParseFail;
5712 }
5713 
5714 //===----------------------------------------------------------------------===//
5715 // MTBUF format
5716 //===----------------------------------------------------------------------===//
5717 
5718 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5719                                   int64_t MaxVal,
5720                                   int64_t &Fmt) {
5721   int64_t Val;
5722   SMLoc Loc = getLoc();
5723 
5724   auto Res = parseIntWithPrefix(Pref, Val);
5725   if (Res == MatchOperand_ParseFail)
5726     return false;
5727   if (Res == MatchOperand_NoMatch)
5728     return true;
5729 
5730   if (Val < 0 || Val > MaxVal) {
5731     Error(Loc, Twine("out of range ", StringRef(Pref)));
5732     return false;
5733   }
5734 
5735   Fmt = Val;
5736   return true;
5737 }
5738 
5739 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5740 // values to live in a joint format operand in the MCInst encoding.
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5743   using namespace llvm::AMDGPU::MTBUFFormat;
5744 
5745   int64_t Dfmt = DFMT_UNDEF;
5746   int64_t Nfmt = NFMT_UNDEF;
5747 
5748   // dfmt and nfmt can appear in either order, and each is optional.
5749   for (int I = 0; I < 2; ++I) {
5750     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5751       return MatchOperand_ParseFail;
5752 
5753     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5754       return MatchOperand_ParseFail;
5755     }
5756     // Skip optional comma between dfmt/nfmt
5757     // but guard against 2 commas following each other.
5758     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5759         !peekToken().is(AsmToken::Comma)) {
5760       trySkipToken(AsmToken::Comma);
5761     }
5762   }
5763 
5764   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5765     return MatchOperand_NoMatch;
5766 
5767   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5768   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5769 
5770   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5771   return MatchOperand_Success;
5772 }
5773 
5774 OperandMatchResultTy
5775 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5776   using namespace llvm::AMDGPU::MTBUFFormat;
5777 
5778   int64_t Fmt = UFMT_UNDEF;
5779 
5780   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5781     return MatchOperand_ParseFail;
5782 
5783   if (Fmt == UFMT_UNDEF)
5784     return MatchOperand_NoMatch;
5785 
5786   Format = Fmt;
5787   return MatchOperand_Success;
5788 }
5789 
5790 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5791                                     int64_t &Nfmt,
5792                                     StringRef FormatStr,
5793                                     SMLoc Loc) {
5794   using namespace llvm::AMDGPU::MTBUFFormat;
5795   int64_t Format;
5796 
5797   Format = getDfmt(FormatStr);
5798   if (Format != DFMT_UNDEF) {
5799     Dfmt = Format;
5800     return true;
5801   }
5802 
5803   Format = getNfmt(FormatStr, getSTI());
5804   if (Format != NFMT_UNDEF) {
5805     Nfmt = Format;
5806     return true;
5807   }
5808 
5809   Error(Loc, "unsupported format");
5810   return false;
5811 }
5812 
5813 OperandMatchResultTy
5814 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5815                                           SMLoc FormatLoc,
5816                                           int64_t &Format) {
5817   using namespace llvm::AMDGPU::MTBUFFormat;
5818 
5819   int64_t Dfmt = DFMT_UNDEF;
5820   int64_t Nfmt = NFMT_UNDEF;
5821   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5822     return MatchOperand_ParseFail;
5823 
5824   if (trySkipToken(AsmToken::Comma)) {
5825     StringRef Str;
5826     SMLoc Loc = getLoc();
5827     if (!parseId(Str, "expected a format string") ||
5828         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5829       return MatchOperand_ParseFail;
5830     }
5831     if (Dfmt == DFMT_UNDEF) {
5832       Error(Loc, "duplicate numeric format");
5833       return MatchOperand_ParseFail;
5834     } else if (Nfmt == NFMT_UNDEF) {
5835       Error(Loc, "duplicate data format");
5836       return MatchOperand_ParseFail;
5837     }
5838   }
5839 
5840   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5841   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5842 
5843   if (isGFX10Plus()) {
5844     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5845     if (Ufmt == UFMT_UNDEF) {
5846       Error(FormatLoc, "unsupported format");
5847       return MatchOperand_ParseFail;
5848     }
5849     Format = Ufmt;
5850   } else {
5851     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5852   }
5853 
5854   return MatchOperand_Success;
5855 }
5856 
5857 OperandMatchResultTy
5858 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5859                                             SMLoc Loc,
5860                                             int64_t &Format) {
5861   using namespace llvm::AMDGPU::MTBUFFormat;
5862 
5863   auto Id = getUnifiedFormat(FormatStr);
5864   if (Id == UFMT_UNDEF)
5865     return MatchOperand_NoMatch;
5866 
5867   if (!isGFX10Plus()) {
5868     Error(Loc, "unified format is not supported on this GPU");
5869     return MatchOperand_ParseFail;
5870   }
5871 
5872   Format = Id;
5873   return MatchOperand_Success;
5874 }
5875 
5876 OperandMatchResultTy
5877 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5878   using namespace llvm::AMDGPU::MTBUFFormat;
5879   SMLoc Loc = getLoc();
5880 
5881   if (!parseExpr(Format))
5882     return MatchOperand_ParseFail;
5883   if (!isValidFormatEncoding(Format, getSTI())) {
5884     Error(Loc, "out of range format");
5885     return MatchOperand_ParseFail;
5886   }
5887 
5888   return MatchOperand_Success;
5889 }
5890 
5891 OperandMatchResultTy
5892 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5893   using namespace llvm::AMDGPU::MTBUFFormat;
5894 
5895   if (!trySkipId("format", AsmToken::Colon))
5896     return MatchOperand_NoMatch;
5897 
5898   if (trySkipToken(AsmToken::LBrac)) {
5899     StringRef FormatStr;
5900     SMLoc Loc = getLoc();
5901     if (!parseId(FormatStr, "expected a format string"))
5902       return MatchOperand_ParseFail;
5903 
5904     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5905     if (Res == MatchOperand_NoMatch)
5906       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5907     if (Res != MatchOperand_Success)
5908       return Res;
5909 
5910     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5911       return MatchOperand_ParseFail;
5912 
5913     return MatchOperand_Success;
5914   }
5915 
5916   return parseNumericFormat(Format);
5917 }
5918 
5919 OperandMatchResultTy
5920 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5921   using namespace llvm::AMDGPU::MTBUFFormat;
5922 
5923   int64_t Format = getDefaultFormatEncoding(getSTI());
5924   OperandMatchResultTy Res;
5925   SMLoc Loc = getLoc();
5926 
5927   // Parse legacy format syntax.
5928   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5929   if (Res == MatchOperand_ParseFail)
5930     return Res;
5931 
5932   bool FormatFound = (Res == MatchOperand_Success);
5933 
5934   Operands.push_back(
5935     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5936 
5937   if (FormatFound)
5938     trySkipToken(AsmToken::Comma);
5939 
5940   if (isToken(AsmToken::EndOfStatement)) {
5941     // We are expecting an soffset operand,
5942     // but let matcher handle the error.
5943     return MatchOperand_Success;
5944   }
5945 
5946   // Parse soffset.
5947   Res = parseRegOrImm(Operands);
5948   if (Res != MatchOperand_Success)
5949     return Res;
5950 
5951   trySkipToken(AsmToken::Comma);
5952 
5953   if (!FormatFound) {
5954     Res = parseSymbolicOrNumericFormat(Format);
5955     if (Res == MatchOperand_ParseFail)
5956       return Res;
5957     if (Res == MatchOperand_Success) {
5958       auto Size = Operands.size();
5959       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5960       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5961       Op.setImm(Format);
5962     }
5963     return MatchOperand_Success;
5964   }
5965 
5966   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5967     Error(getLoc(), "duplicate format");
5968     return MatchOperand_ParseFail;
5969   }
5970   return MatchOperand_Success;
5971 }
5972 
5973 //===----------------------------------------------------------------------===//
5974 // ds
5975 //===----------------------------------------------------------------------===//
5976 
5977 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5978                                     const OperandVector &Operands) {
5979   OptionalImmIndexMap OptionalIdx;
5980 
5981   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5982     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5983 
5984     // Add the register arguments
5985     if (Op.isReg()) {
5986       Op.addRegOperands(Inst, 1);
5987       continue;
5988     }
5989 
5990     // Handle optional arguments
5991     OptionalIdx[Op.getImmTy()] = i;
5992   }
5993 
5994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5995   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5997 
5998   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5999 }
6000 
6001 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6002                                 bool IsGdsHardcoded) {
6003   OptionalImmIndexMap OptionalIdx;
6004 
6005   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6006     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6007 
6008     // Add the register arguments
6009     if (Op.isReg()) {
6010       Op.addRegOperands(Inst, 1);
6011       continue;
6012     }
6013 
6014     if (Op.isToken() && Op.getToken() == "gds") {
6015       IsGdsHardcoded = true;
6016       continue;
6017     }
6018 
6019     // Handle optional arguments
6020     OptionalIdx[Op.getImmTy()] = i;
6021   }
6022 
6023   AMDGPUOperand::ImmTy OffsetType =
6024     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6025      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6026      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6027                                                       AMDGPUOperand::ImmTyOffset;
6028 
6029   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6030 
6031   if (!IsGdsHardcoded) {
6032     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6033   }
6034   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6035 }
6036 
6037 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6038   OptionalImmIndexMap OptionalIdx;
6039 
6040   unsigned OperandIdx[4];
6041   unsigned EnMask = 0;
6042   int SrcIdx = 0;
6043 
6044   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6045     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6046 
6047     // Add the register arguments
6048     if (Op.isReg()) {
6049       assert(SrcIdx < 4);
6050       OperandIdx[SrcIdx] = Inst.size();
6051       Op.addRegOperands(Inst, 1);
6052       ++SrcIdx;
6053       continue;
6054     }
6055 
6056     if (Op.isOff()) {
6057       assert(SrcIdx < 4);
6058       OperandIdx[SrcIdx] = Inst.size();
6059       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6060       ++SrcIdx;
6061       continue;
6062     }
6063 
6064     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6065       Op.addImmOperands(Inst, 1);
6066       continue;
6067     }
6068 
6069     if (Op.isToken() && Op.getToken() == "done")
6070       continue;
6071 
6072     // Handle optional arguments
6073     OptionalIdx[Op.getImmTy()] = i;
6074   }
6075 
6076   assert(SrcIdx == 4);
6077 
6078   bool Compr = false;
6079   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6080     Compr = true;
6081     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6082     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6083     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6084   }
6085 
6086   for (auto i = 0; i < SrcIdx; ++i) {
6087     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6088       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6089     }
6090   }
6091 
6092   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6093   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6094 
6095   Inst.addOperand(MCOperand::createImm(EnMask));
6096 }
6097 
6098 //===----------------------------------------------------------------------===//
6099 // s_waitcnt
6100 //===----------------------------------------------------------------------===//
6101 
6102 static bool
6103 encodeCnt(
6104   const AMDGPU::IsaVersion ISA,
6105   int64_t &IntVal,
6106   int64_t CntVal,
6107   bool Saturate,
6108   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6109   unsigned (*decode)(const IsaVersion &Version, unsigned))
6110 {
6111   bool Failed = false;
6112 
6113   IntVal = encode(ISA, IntVal, CntVal);
6114   if (CntVal != decode(ISA, IntVal)) {
6115     if (Saturate) {
6116       IntVal = encode(ISA, IntVal, -1);
6117     } else {
6118       Failed = true;
6119     }
6120   }
6121   return Failed;
6122 }
6123 
6124 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6125 
6126   SMLoc CntLoc = getLoc();
6127   StringRef CntName = getTokenStr();
6128 
6129   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6130       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6131     return false;
6132 
6133   int64_t CntVal;
6134   SMLoc ValLoc = getLoc();
6135   if (!parseExpr(CntVal))
6136     return false;
6137 
6138   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6139 
6140   bool Failed = true;
6141   bool Sat = CntName.endswith("_sat");
6142 
6143   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6144     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6145   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6146     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6147   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6148     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6149   } else {
6150     Error(CntLoc, "invalid counter name " + CntName);
6151     return false;
6152   }
6153 
6154   if (Failed) {
6155     Error(ValLoc, "too large value for " + CntName);
6156     return false;
6157   }
6158 
6159   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6160     return false;
6161 
6162   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6163     if (isToken(AsmToken::EndOfStatement)) {
6164       Error(getLoc(), "expected a counter name");
6165       return false;
6166     }
6167   }
6168 
6169   return true;
6170 }
6171 
6172 OperandMatchResultTy
6173 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6174   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6175   int64_t Waitcnt = getWaitcntBitMask(ISA);
6176   SMLoc S = getLoc();
6177 
6178   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6179     while (!isToken(AsmToken::EndOfStatement)) {
6180       if (!parseCnt(Waitcnt))
6181         return MatchOperand_ParseFail;
6182     }
6183   } else {
6184     if (!parseExpr(Waitcnt))
6185       return MatchOperand_ParseFail;
6186   }
6187 
6188   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6189   return MatchOperand_Success;
6190 }
6191 
6192 bool
6193 AMDGPUOperand::isSWaitCnt() const {
6194   return isImm();
6195 }
6196 
6197 //===----------------------------------------------------------------------===//
6198 // hwreg
6199 //===----------------------------------------------------------------------===//
6200 
6201 bool
6202 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6203                                 OperandInfoTy &Offset,
6204                                 OperandInfoTy &Width) {
6205   using namespace llvm::AMDGPU::Hwreg;
6206 
6207   // The register may be specified by name or using a numeric code
6208   HwReg.Loc = getLoc();
6209   if (isToken(AsmToken::Identifier) &&
6210       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) {
6211     HwReg.IsSymbolic = true;
6212     lex(); // skip register name
6213   } else if (!parseExpr(HwReg.Id, "a register name")) {
6214     return false;
6215   }
6216 
6217   if (trySkipToken(AsmToken::RParen))
6218     return true;
6219 
6220   // parse optional params
6221   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6222     return false;
6223 
6224   Offset.Loc = getLoc();
6225   if (!parseExpr(Offset.Id))
6226     return false;
6227 
6228   if (!skipToken(AsmToken::Comma, "expected a comma"))
6229     return false;
6230 
6231   Width.Loc = getLoc();
6232   return parseExpr(Width.Id) &&
6233          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6234 }
6235 
6236 bool
6237 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6238                                const OperandInfoTy &Offset,
6239                                const OperandInfoTy &Width) {
6240 
6241   using namespace llvm::AMDGPU::Hwreg;
6242 
6243   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6244     Error(HwReg.Loc,
6245           "specified hardware register is not supported on this GPU");
6246     return false;
6247   }
6248   if (!isValidHwreg(HwReg.Id)) {
6249     Error(HwReg.Loc,
6250           "invalid code of hardware register: only 6-bit values are legal");
6251     return false;
6252   }
6253   if (!isValidHwregOffset(Offset.Id)) {
6254     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6255     return false;
6256   }
6257   if (!isValidHwregWidth(Width.Id)) {
6258     Error(Width.Loc,
6259           "invalid bitfield width: only values from 1 to 32 are legal");
6260     return false;
6261   }
6262   return true;
6263 }
6264 
6265 OperandMatchResultTy
6266 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6267   using namespace llvm::AMDGPU::Hwreg;
6268 
6269   int64_t ImmVal = 0;
6270   SMLoc Loc = getLoc();
6271 
6272   if (trySkipId("hwreg", AsmToken::LParen)) {
6273     OperandInfoTy HwReg(ID_UNKNOWN_);
6274     OperandInfoTy Offset(OFFSET_DEFAULT_);
6275     OperandInfoTy Width(WIDTH_DEFAULT_);
6276     if (parseHwregBody(HwReg, Offset, Width) &&
6277         validateHwreg(HwReg, Offset, Width)) {
6278       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6279     } else {
6280       return MatchOperand_ParseFail;
6281     }
6282   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6283     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6284       Error(Loc, "invalid immediate: only 16-bit values are legal");
6285       return MatchOperand_ParseFail;
6286     }
6287   } else {
6288     return MatchOperand_ParseFail;
6289   }
6290 
6291   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6292   return MatchOperand_Success;
6293 }
6294 
6295 bool AMDGPUOperand::isHwreg() const {
6296   return isImmTy(ImmTyHwreg);
6297 }
6298 
6299 //===----------------------------------------------------------------------===//
6300 // sendmsg
6301 //===----------------------------------------------------------------------===//
6302 
6303 bool
6304 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6305                                   OperandInfoTy &Op,
6306                                   OperandInfoTy &Stream) {
6307   using namespace llvm::AMDGPU::SendMsg;
6308 
6309   Msg.Loc = getLoc();
6310   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6311     Msg.IsSymbolic = true;
6312     lex(); // skip message name
6313   } else if (!parseExpr(Msg.Id, "a message name")) {
6314     return false;
6315   }
6316 
6317   if (trySkipToken(AsmToken::Comma)) {
6318     Op.IsDefined = true;
6319     Op.Loc = getLoc();
6320     if (isToken(AsmToken::Identifier) &&
6321         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6322       lex(); // skip operation name
6323     } else if (!parseExpr(Op.Id, "an operation name")) {
6324       return false;
6325     }
6326 
6327     if (trySkipToken(AsmToken::Comma)) {
6328       Stream.IsDefined = true;
6329       Stream.Loc = getLoc();
6330       if (!parseExpr(Stream.Id))
6331         return false;
6332     }
6333   }
6334 
6335   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6336 }
6337 
6338 bool
6339 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6340                                  const OperandInfoTy &Op,
6341                                  const OperandInfoTy &Stream) {
6342   using namespace llvm::AMDGPU::SendMsg;
6343 
6344   // Validation strictness depends on whether message is specified
6345   // in a symbolic or in a numeric form. In the latter case
6346   // only encoding possibility is checked.
6347   bool Strict = Msg.IsSymbolic;
6348 
6349   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6350     Error(Msg.Loc, "invalid message id");
6351     return false;
6352   }
6353   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6354     if (Op.IsDefined) {
6355       Error(Op.Loc, "message does not support operations");
6356     } else {
6357       Error(Msg.Loc, "missing message operation");
6358     }
6359     return false;
6360   }
6361   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6362     Error(Op.Loc, "invalid operation id");
6363     return false;
6364   }
6365   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6366     Error(Stream.Loc, "message operation does not support streams");
6367     return false;
6368   }
6369   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6370     Error(Stream.Loc, "invalid message stream id");
6371     return false;
6372   }
6373   return true;
6374 }
6375 
6376 OperandMatchResultTy
6377 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6378   using namespace llvm::AMDGPU::SendMsg;
6379 
6380   int64_t ImmVal = 0;
6381   SMLoc Loc = getLoc();
6382 
6383   if (trySkipId("sendmsg", AsmToken::LParen)) {
6384     OperandInfoTy Msg(ID_UNKNOWN_);
6385     OperandInfoTy Op(OP_NONE_);
6386     OperandInfoTy Stream(STREAM_ID_NONE_);
6387     if (parseSendMsgBody(Msg, Op, Stream) &&
6388         validateSendMsg(Msg, Op, Stream)) {
6389       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6390     } else {
6391       return MatchOperand_ParseFail;
6392     }
6393   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6394     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6395       Error(Loc, "invalid immediate: only 16-bit values are legal");
6396       return MatchOperand_ParseFail;
6397     }
6398   } else {
6399     return MatchOperand_ParseFail;
6400   }
6401 
6402   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6403   return MatchOperand_Success;
6404 }
6405 
6406 bool AMDGPUOperand::isSendMsg() const {
6407   return isImmTy(ImmTySendMsg);
6408 }
6409 
6410 //===----------------------------------------------------------------------===//
6411 // v_interp
6412 //===----------------------------------------------------------------------===//
6413 
6414 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6415   StringRef Str;
6416   SMLoc S = getLoc();
6417 
6418   if (!parseId(Str))
6419     return MatchOperand_NoMatch;
6420 
6421   int Slot = StringSwitch<int>(Str)
6422     .Case("p10", 0)
6423     .Case("p20", 1)
6424     .Case("p0", 2)
6425     .Default(-1);
6426 
6427   if (Slot == -1) {
6428     Error(S, "invalid interpolation slot");
6429     return MatchOperand_ParseFail;
6430   }
6431 
6432   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6433                                               AMDGPUOperand::ImmTyInterpSlot));
6434   return MatchOperand_Success;
6435 }
6436 
6437 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6438   StringRef Str;
6439   SMLoc S = getLoc();
6440 
6441   if (!parseId(Str))
6442     return MatchOperand_NoMatch;
6443 
6444   if (!Str.startswith("attr")) {
6445     Error(S, "invalid interpolation attribute");
6446     return MatchOperand_ParseFail;
6447   }
6448 
6449   StringRef Chan = Str.take_back(2);
6450   int AttrChan = StringSwitch<int>(Chan)
6451     .Case(".x", 0)
6452     .Case(".y", 1)
6453     .Case(".z", 2)
6454     .Case(".w", 3)
6455     .Default(-1);
6456   if (AttrChan == -1) {
6457     Error(S, "invalid or missing interpolation attribute channel");
6458     return MatchOperand_ParseFail;
6459   }
6460 
6461   Str = Str.drop_back(2).drop_front(4);
6462 
6463   uint8_t Attr;
6464   if (Str.getAsInteger(10, Attr)) {
6465     Error(S, "invalid or missing interpolation attribute number");
6466     return MatchOperand_ParseFail;
6467   }
6468 
6469   if (Attr > 63) {
6470     Error(S, "out of bounds interpolation attribute number");
6471     return MatchOperand_ParseFail;
6472   }
6473 
6474   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6475 
6476   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6477                                               AMDGPUOperand::ImmTyInterpAttr));
6478   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6479                                               AMDGPUOperand::ImmTyAttrChan));
6480   return MatchOperand_Success;
6481 }
6482 
6483 //===----------------------------------------------------------------------===//
6484 // exp
6485 //===----------------------------------------------------------------------===//
6486 
6487 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6488   using namespace llvm::AMDGPU::Exp;
6489 
6490   StringRef Str;
6491   SMLoc S = getLoc();
6492 
6493   if (!parseId(Str))
6494     return MatchOperand_NoMatch;
6495 
6496   unsigned Id = getTgtId(Str);
6497   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6498     Error(S, (Id == ET_INVALID) ?
6499                 "invalid exp target" :
6500                 "exp target is not supported on this GPU");
6501     return MatchOperand_ParseFail;
6502   }
6503 
6504   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6505                                               AMDGPUOperand::ImmTyExpTgt));
6506   return MatchOperand_Success;
6507 }
6508 
6509 //===----------------------------------------------------------------------===//
6510 // parser helpers
6511 //===----------------------------------------------------------------------===//
6512 
6513 bool
6514 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6515   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6516 }
6517 
6518 bool
6519 AMDGPUAsmParser::isId(const StringRef Id) const {
6520   return isId(getToken(), Id);
6521 }
6522 
6523 bool
6524 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6525   return getTokenKind() == Kind;
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6530   if (isId(Id)) {
6531     lex();
6532     return true;
6533   }
6534   return false;
6535 }
6536 
6537 bool
6538 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6539   if (isToken(AsmToken::Identifier)) {
6540     StringRef Tok = getTokenStr();
6541     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6542       lex();
6543       return true;
6544     }
6545   }
6546   return false;
6547 }
6548 
6549 bool
6550 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6551   if (isId(Id) && peekToken().is(Kind)) {
6552     lex();
6553     lex();
6554     return true;
6555   }
6556   return false;
6557 }
6558 
6559 bool
6560 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6561   if (isToken(Kind)) {
6562     lex();
6563     return true;
6564   }
6565   return false;
6566 }
6567 
6568 bool
6569 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6570                            const StringRef ErrMsg) {
6571   if (!trySkipToken(Kind)) {
6572     Error(getLoc(), ErrMsg);
6573     return false;
6574   }
6575   return true;
6576 }
6577 
6578 bool
6579 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6580   SMLoc S = getLoc();
6581 
6582   const MCExpr *Expr;
6583   if (Parser.parseExpression(Expr))
6584     return false;
6585 
6586   if (Expr->evaluateAsAbsolute(Imm))
6587     return true;
6588 
6589   if (Expected.empty()) {
6590     Error(S, "expected absolute expression");
6591   } else {
6592     Error(S, Twine("expected ", Expected) +
6593              Twine(" or an absolute expression"));
6594   }
6595   return false;
6596 }
6597 
6598 bool
6599 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6600   SMLoc S = getLoc();
6601 
6602   const MCExpr *Expr;
6603   if (Parser.parseExpression(Expr))
6604     return false;
6605 
6606   int64_t IntVal;
6607   if (Expr->evaluateAsAbsolute(IntVal)) {
6608     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6609   } else {
6610     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6611   }
6612   return true;
6613 }
6614 
6615 bool
6616 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6617   if (isToken(AsmToken::String)) {
6618     Val = getToken().getStringContents();
6619     lex();
6620     return true;
6621   } else {
6622     Error(getLoc(), ErrMsg);
6623     return false;
6624   }
6625 }
6626 
6627 bool
6628 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6629   if (isToken(AsmToken::Identifier)) {
6630     Val = getTokenStr();
6631     lex();
6632     return true;
6633   } else {
6634     if (!ErrMsg.empty())
6635       Error(getLoc(), ErrMsg);
6636     return false;
6637   }
6638 }
6639 
6640 AsmToken
6641 AMDGPUAsmParser::getToken() const {
6642   return Parser.getTok();
6643 }
6644 
6645 AsmToken
6646 AMDGPUAsmParser::peekToken() {
6647   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6648 }
6649 
6650 void
6651 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6652   auto TokCount = getLexer().peekTokens(Tokens);
6653 
6654   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6655     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6656 }
6657 
6658 AsmToken::TokenKind
6659 AMDGPUAsmParser::getTokenKind() const {
6660   return getLexer().getKind();
6661 }
6662 
6663 SMLoc
6664 AMDGPUAsmParser::getLoc() const {
6665   return getToken().getLoc();
6666 }
6667 
6668 StringRef
6669 AMDGPUAsmParser::getTokenStr() const {
6670   return getToken().getString();
6671 }
6672 
6673 void
6674 AMDGPUAsmParser::lex() {
6675   Parser.Lex();
6676 }
6677 
6678 SMLoc
6679 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6680                                const OperandVector &Operands) const {
6681   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6682     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6683     if (Test(Op))
6684       return Op.getStartLoc();
6685   }
6686   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6687 }
6688 
6689 SMLoc
6690 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6691                            const OperandVector &Operands) const {
6692   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6693   return getOperandLoc(Test, Operands);
6694 }
6695 
6696 SMLoc
6697 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6698                            const OperandVector &Operands) const {
6699   auto Test = [=](const AMDGPUOperand& Op) {
6700     return Op.isRegKind() && Op.getReg() == Reg;
6701   };
6702   return getOperandLoc(Test, Operands);
6703 }
6704 
6705 SMLoc
6706 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6707   auto Test = [](const AMDGPUOperand& Op) {
6708     return Op.IsImmKindLiteral() || Op.isExpr();
6709   };
6710   return getOperandLoc(Test, Operands);
6711 }
6712 
6713 SMLoc
6714 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6715   auto Test = [](const AMDGPUOperand& Op) {
6716     return Op.isImmKindConst();
6717   };
6718   return getOperandLoc(Test, Operands);
6719 }
6720 
6721 //===----------------------------------------------------------------------===//
6722 // swizzle
6723 //===----------------------------------------------------------------------===//
6724 
6725 LLVM_READNONE
6726 static unsigned
6727 encodeBitmaskPerm(const unsigned AndMask,
6728                   const unsigned OrMask,
6729                   const unsigned XorMask) {
6730   using namespace llvm::AMDGPU::Swizzle;
6731 
6732   return BITMASK_PERM_ENC |
6733          (AndMask << BITMASK_AND_SHIFT) |
6734          (OrMask  << BITMASK_OR_SHIFT)  |
6735          (XorMask << BITMASK_XOR_SHIFT);
6736 }
6737 
6738 bool
6739 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6740                                      const unsigned MinVal,
6741                                      const unsigned MaxVal,
6742                                      const StringRef ErrMsg,
6743                                      SMLoc &Loc) {
6744   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6745     return false;
6746   }
6747   Loc = getLoc();
6748   if (!parseExpr(Op)) {
6749     return false;
6750   }
6751   if (Op < MinVal || Op > MaxVal) {
6752     Error(Loc, ErrMsg);
6753     return false;
6754   }
6755 
6756   return true;
6757 }
6758 
6759 bool
6760 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6761                                       const unsigned MinVal,
6762                                       const unsigned MaxVal,
6763                                       const StringRef ErrMsg) {
6764   SMLoc Loc;
6765   for (unsigned i = 0; i < OpNum; ++i) {
6766     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6767       return false;
6768   }
6769 
6770   return true;
6771 }
6772 
6773 bool
6774 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6775   using namespace llvm::AMDGPU::Swizzle;
6776 
6777   int64_t Lane[LANE_NUM];
6778   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6779                            "expected a 2-bit lane id")) {
6780     Imm = QUAD_PERM_ENC;
6781     for (unsigned I = 0; I < LANE_NUM; ++I) {
6782       Imm |= Lane[I] << (LANE_SHIFT * I);
6783     }
6784     return true;
6785   }
6786   return false;
6787 }
6788 
6789 bool
6790 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6791   using namespace llvm::AMDGPU::Swizzle;
6792 
6793   SMLoc Loc;
6794   int64_t GroupSize;
6795   int64_t LaneIdx;
6796 
6797   if (!parseSwizzleOperand(GroupSize,
6798                            2, 32,
6799                            "group size must be in the interval [2,32]",
6800                            Loc)) {
6801     return false;
6802   }
6803   if (!isPowerOf2_64(GroupSize)) {
6804     Error(Loc, "group size must be a power of two");
6805     return false;
6806   }
6807   if (parseSwizzleOperand(LaneIdx,
6808                           0, GroupSize - 1,
6809                           "lane id must be in the interval [0,group size - 1]",
6810                           Loc)) {
6811     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6812     return true;
6813   }
6814   return false;
6815 }
6816 
6817 bool
6818 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6819   using namespace llvm::AMDGPU::Swizzle;
6820 
6821   SMLoc Loc;
6822   int64_t GroupSize;
6823 
6824   if (!parseSwizzleOperand(GroupSize,
6825                            2, 32,
6826                            "group size must be in the interval [2,32]",
6827                            Loc)) {
6828     return false;
6829   }
6830   if (!isPowerOf2_64(GroupSize)) {
6831     Error(Loc, "group size must be a power of two");
6832     return false;
6833   }
6834 
6835   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6836   return true;
6837 }
6838 
6839 bool
6840 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6841   using namespace llvm::AMDGPU::Swizzle;
6842 
6843   SMLoc Loc;
6844   int64_t GroupSize;
6845 
6846   if (!parseSwizzleOperand(GroupSize,
6847                            1, 16,
6848                            "group size must be in the interval [1,16]",
6849                            Loc)) {
6850     return false;
6851   }
6852   if (!isPowerOf2_64(GroupSize)) {
6853     Error(Loc, "group size must be a power of two");
6854     return false;
6855   }
6856 
6857   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6858   return true;
6859 }
6860 
6861 bool
6862 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6863   using namespace llvm::AMDGPU::Swizzle;
6864 
6865   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6866     return false;
6867   }
6868 
6869   StringRef Ctl;
6870   SMLoc StrLoc = getLoc();
6871   if (!parseString(Ctl)) {
6872     return false;
6873   }
6874   if (Ctl.size() != BITMASK_WIDTH) {
6875     Error(StrLoc, "expected a 5-character mask");
6876     return false;
6877   }
6878 
6879   unsigned AndMask = 0;
6880   unsigned OrMask = 0;
6881   unsigned XorMask = 0;
6882 
6883   for (size_t i = 0; i < Ctl.size(); ++i) {
6884     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6885     switch(Ctl[i]) {
6886     default:
6887       Error(StrLoc, "invalid mask");
6888       return false;
6889     case '0':
6890       break;
6891     case '1':
6892       OrMask |= Mask;
6893       break;
6894     case 'p':
6895       AndMask |= Mask;
6896       break;
6897     case 'i':
6898       AndMask |= Mask;
6899       XorMask |= Mask;
6900       break;
6901     }
6902   }
6903 
6904   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6905   return true;
6906 }
6907 
6908 bool
6909 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6910 
6911   SMLoc OffsetLoc = getLoc();
6912 
6913   if (!parseExpr(Imm, "a swizzle macro")) {
6914     return false;
6915   }
6916   if (!isUInt<16>(Imm)) {
6917     Error(OffsetLoc, "expected a 16-bit offset");
6918     return false;
6919   }
6920   return true;
6921 }
6922 
6923 bool
6924 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6925   using namespace llvm::AMDGPU::Swizzle;
6926 
6927   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6928 
6929     SMLoc ModeLoc = getLoc();
6930     bool Ok = false;
6931 
6932     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6933       Ok = parseSwizzleQuadPerm(Imm);
6934     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6935       Ok = parseSwizzleBitmaskPerm(Imm);
6936     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6937       Ok = parseSwizzleBroadcast(Imm);
6938     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6939       Ok = parseSwizzleSwap(Imm);
6940     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6941       Ok = parseSwizzleReverse(Imm);
6942     } else {
6943       Error(ModeLoc, "expected a swizzle mode");
6944     }
6945 
6946     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6947   }
6948 
6949   return false;
6950 }
6951 
6952 OperandMatchResultTy
6953 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6954   SMLoc S = getLoc();
6955   int64_t Imm = 0;
6956 
6957   if (trySkipId("offset")) {
6958 
6959     bool Ok = false;
6960     if (skipToken(AsmToken::Colon, "expected a colon")) {
6961       if (trySkipId("swizzle")) {
6962         Ok = parseSwizzleMacro(Imm);
6963       } else {
6964         Ok = parseSwizzleOffset(Imm);
6965       }
6966     }
6967 
6968     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6969 
6970     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6971   } else {
6972     // Swizzle "offset" operand is optional.
6973     // If it is omitted, try parsing other optional operands.
6974     return parseOptionalOpr(Operands);
6975   }
6976 }
6977 
6978 bool
6979 AMDGPUOperand::isSwizzle() const {
6980   return isImmTy(ImmTySwizzle);
6981 }
6982 
6983 //===----------------------------------------------------------------------===//
6984 // VGPR Index Mode
6985 //===----------------------------------------------------------------------===//
6986 
6987 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6988 
6989   using namespace llvm::AMDGPU::VGPRIndexMode;
6990 
6991   if (trySkipToken(AsmToken::RParen)) {
6992     return OFF;
6993   }
6994 
6995   int64_t Imm = 0;
6996 
6997   while (true) {
6998     unsigned Mode = 0;
6999     SMLoc S = getLoc();
7000 
7001     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7002       if (trySkipId(IdSymbolic[ModeId])) {
7003         Mode = 1 << ModeId;
7004         break;
7005       }
7006     }
7007 
7008     if (Mode == 0) {
7009       Error(S, (Imm == 0)?
7010                "expected a VGPR index mode or a closing parenthesis" :
7011                "expected a VGPR index mode");
7012       return UNDEF;
7013     }
7014 
7015     if (Imm & Mode) {
7016       Error(S, "duplicate VGPR index mode");
7017       return UNDEF;
7018     }
7019     Imm |= Mode;
7020 
7021     if (trySkipToken(AsmToken::RParen))
7022       break;
7023     if (!skipToken(AsmToken::Comma,
7024                    "expected a comma or a closing parenthesis"))
7025       return UNDEF;
7026   }
7027 
7028   return Imm;
7029 }
7030 
7031 OperandMatchResultTy
7032 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7033 
7034   using namespace llvm::AMDGPU::VGPRIndexMode;
7035 
7036   int64_t Imm = 0;
7037   SMLoc S = getLoc();
7038 
7039   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7040     Imm = parseGPRIdxMacro();
7041     if (Imm == UNDEF)
7042       return MatchOperand_ParseFail;
7043   } else {
7044     if (getParser().parseAbsoluteExpression(Imm))
7045       return MatchOperand_ParseFail;
7046     if (Imm < 0 || !isUInt<4>(Imm)) {
7047       Error(S, "invalid immediate: only 4-bit values are legal");
7048       return MatchOperand_ParseFail;
7049     }
7050   }
7051 
7052   Operands.push_back(
7053       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7054   return MatchOperand_Success;
7055 }
7056 
7057 bool AMDGPUOperand::isGPRIdxMode() const {
7058   return isImmTy(ImmTyGprIdxMode);
7059 }
7060 
7061 //===----------------------------------------------------------------------===//
7062 // sopp branch targets
7063 //===----------------------------------------------------------------------===//
7064 
7065 OperandMatchResultTy
7066 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7067 
7068   // Make sure we are not parsing something
7069   // that looks like a label or an expression but is not.
7070   // This will improve error messages.
7071   if (isRegister() || isModifier())
7072     return MatchOperand_NoMatch;
7073 
7074   if (!parseExpr(Operands))
7075     return MatchOperand_ParseFail;
7076 
7077   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7078   assert(Opr.isImm() || Opr.isExpr());
7079   SMLoc Loc = Opr.getStartLoc();
7080 
7081   // Currently we do not support arbitrary expressions as branch targets.
7082   // Only labels and absolute expressions are accepted.
7083   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7084     Error(Loc, "expected an absolute expression or a label");
7085   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7086     Error(Loc, "expected a 16-bit signed jump offset");
7087   }
7088 
7089   return MatchOperand_Success;
7090 }
7091 
7092 //===----------------------------------------------------------------------===//
7093 // Boolean holding registers
7094 //===----------------------------------------------------------------------===//
7095 
7096 OperandMatchResultTy
7097 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7098   return parseReg(Operands);
7099 }
7100 
7101 //===----------------------------------------------------------------------===//
7102 // mubuf
7103 //===----------------------------------------------------------------------===//
7104 
7105 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7106   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7107 }
7108 
7109 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7110                                    const OperandVector &Operands,
7111                                    bool IsAtomic,
7112                                    bool IsLds) {
7113   bool IsLdsOpcode = IsLds;
7114   bool HasLdsModifier = false;
7115   OptionalImmIndexMap OptionalIdx;
7116   unsigned FirstOperandIdx = 1;
7117   bool IsAtomicReturn = false;
7118 
7119   if (IsAtomic) {
7120     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7121       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7122       if (!Op.isCPol())
7123         continue;
7124       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7125       break;
7126     }
7127 
7128     if (!IsAtomicReturn) {
7129       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7130       if (NewOpc != -1)
7131         Inst.setOpcode(NewOpc);
7132     }
7133 
7134     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7135                       SIInstrFlags::IsAtomicRet;
7136   }
7137 
7138   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7139     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7140 
7141     // Add the register arguments
7142     if (Op.isReg()) {
7143       Op.addRegOperands(Inst, 1);
7144       // Insert a tied src for atomic return dst.
7145       // This cannot be postponed as subsequent calls to
7146       // addImmOperands rely on correct number of MC operands.
7147       if (IsAtomicReturn && i == FirstOperandIdx)
7148         Op.addRegOperands(Inst, 1);
7149       continue;
7150     }
7151 
7152     // Handle the case where soffset is an immediate
7153     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7154       Op.addImmOperands(Inst, 1);
7155       continue;
7156     }
7157 
7158     HasLdsModifier |= Op.isLDS();
7159 
7160     // Handle tokens like 'offen' which are sometimes hard-coded into the
7161     // asm string.  There are no MCInst operands for these.
7162     if (Op.isToken()) {
7163       continue;
7164     }
7165     assert(Op.isImm());
7166 
7167     // Handle optional arguments
7168     OptionalIdx[Op.getImmTy()] = i;
7169   }
7170 
7171   // This is a workaround for an llvm quirk which may result in an
7172   // incorrect instruction selection. Lds and non-lds versions of
7173   // MUBUF instructions are identical except that lds versions
7174   // have mandatory 'lds' modifier. However this modifier follows
7175   // optional modifiers and llvm asm matcher regards this 'lds'
7176   // modifier as an optional one. As a result, an lds version
7177   // of opcode may be selected even if it has no 'lds' modifier.
7178   if (IsLdsOpcode && !HasLdsModifier) {
7179     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7180     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7181       Inst.setOpcode(NoLdsOpcode);
7182       IsLdsOpcode = false;
7183     }
7184   }
7185 
7186   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7187   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7188 
7189   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7190     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7191   }
7192   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7193 }
7194 
7195 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7196   OptionalImmIndexMap OptionalIdx;
7197 
7198   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7199     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7200 
7201     // Add the register arguments
7202     if (Op.isReg()) {
7203       Op.addRegOperands(Inst, 1);
7204       continue;
7205     }
7206 
7207     // Handle the case where soffset is an immediate
7208     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7209       Op.addImmOperands(Inst, 1);
7210       continue;
7211     }
7212 
7213     // Handle tokens like 'offen' which are sometimes hard-coded into the
7214     // asm string.  There are no MCInst operands for these.
7215     if (Op.isToken()) {
7216       continue;
7217     }
7218     assert(Op.isImm());
7219 
7220     // Handle optional arguments
7221     OptionalIdx[Op.getImmTy()] = i;
7222   }
7223 
7224   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7225                         AMDGPUOperand::ImmTyOffset);
7226   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7227   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7228   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7229   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7230 }
7231 
7232 //===----------------------------------------------------------------------===//
7233 // mimg
7234 //===----------------------------------------------------------------------===//
7235 
7236 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7237                               bool IsAtomic) {
7238   unsigned I = 1;
7239   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7240   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7241     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7242   }
7243 
7244   if (IsAtomic) {
7245     // Add src, same as dst
7246     assert(Desc.getNumDefs() == 1);
7247     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7248   }
7249 
7250   OptionalImmIndexMap OptionalIdx;
7251 
7252   for (unsigned E = Operands.size(); I != E; ++I) {
7253     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7254 
7255     // Add the register arguments
7256     if (Op.isReg()) {
7257       Op.addRegOperands(Inst, 1);
7258     } else if (Op.isImmModifier()) {
7259       OptionalIdx[Op.getImmTy()] = I;
7260     } else if (!Op.isToken()) {
7261       llvm_unreachable("unexpected operand type");
7262     }
7263   }
7264 
7265   bool IsGFX10Plus = isGFX10Plus();
7266 
7267   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7268   if (IsGFX10Plus)
7269     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7270   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7271   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7272   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7273   if (IsGFX10Plus)
7274     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7275   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7276     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7277   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7278   if (!IsGFX10Plus)
7279     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7280   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7281 }
7282 
7283 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7284   cvtMIMG(Inst, Operands, true);
7285 }
7286 
7287 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7288   OptionalImmIndexMap OptionalIdx;
7289   bool IsAtomicReturn = false;
7290 
7291   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7292     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7293     if (!Op.isCPol())
7294       continue;
7295     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7296     break;
7297   }
7298 
7299   if (!IsAtomicReturn) {
7300     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7301     if (NewOpc != -1)
7302       Inst.setOpcode(NewOpc);
7303   }
7304 
7305   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7306                     SIInstrFlags::IsAtomicRet;
7307 
7308   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7309     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7310 
7311     // Add the register arguments
7312     if (Op.isReg()) {
7313       Op.addRegOperands(Inst, 1);
7314       if (IsAtomicReturn && i == 1)
7315         Op.addRegOperands(Inst, 1);
7316       continue;
7317     }
7318 
7319     // Handle the case where soffset is an immediate
7320     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7321       Op.addImmOperands(Inst, 1);
7322       continue;
7323     }
7324 
7325     // Handle tokens like 'offen' which are sometimes hard-coded into the
7326     // asm string.  There are no MCInst operands for these.
7327     if (Op.isToken()) {
7328       continue;
7329     }
7330     assert(Op.isImm());
7331 
7332     // Handle optional arguments
7333     OptionalIdx[Op.getImmTy()] = i;
7334   }
7335 
7336   if ((int)Inst.getNumOperands() <=
7337       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7338     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7339   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7340 }
7341 
7342 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7343                                       const OperandVector &Operands) {
7344   for (unsigned I = 1; I < Operands.size(); ++I) {
7345     auto &Operand = (AMDGPUOperand &)*Operands[I];
7346     if (Operand.isReg())
7347       Operand.addRegOperands(Inst, 1);
7348   }
7349 
7350   Inst.addOperand(MCOperand::createImm(1)); // a16
7351 }
7352 
7353 //===----------------------------------------------------------------------===//
7354 // smrd
7355 //===----------------------------------------------------------------------===//
7356 
7357 bool AMDGPUOperand::isSMRDOffset8() const {
7358   return isImm() && isUInt<8>(getImm());
7359 }
7360 
7361 bool AMDGPUOperand::isSMEMOffset() const {
7362   return isImm(); // Offset range is checked later by validator.
7363 }
7364 
7365 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7366   // 32-bit literals are only supported on CI and we only want to use them
7367   // when the offset is > 8-bits.
7368   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7369 }
7370 
7371 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7372   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7373 }
7374 
7375 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7376   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7377 }
7378 
7379 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7380   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7381 }
7382 
7383 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7384   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7385 }
7386 
7387 //===----------------------------------------------------------------------===//
7388 // vop3
7389 //===----------------------------------------------------------------------===//
7390 
7391 static bool ConvertOmodMul(int64_t &Mul) {
7392   if (Mul != 1 && Mul != 2 && Mul != 4)
7393     return false;
7394 
7395   Mul >>= 1;
7396   return true;
7397 }
7398 
7399 static bool ConvertOmodDiv(int64_t &Div) {
7400   if (Div == 1) {
7401     Div = 0;
7402     return true;
7403   }
7404 
7405   if (Div == 2) {
7406     Div = 3;
7407     return true;
7408   }
7409 
7410   return false;
7411 }
7412 
7413 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7414 // This is intentional and ensures compatibility with sp3.
7415 // See bug 35397 for details.
7416 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7417   if (BoundCtrl == 0 || BoundCtrl == 1) {
7418     BoundCtrl = 1;
7419     return true;
7420   }
7421   return false;
7422 }
7423 
7424 // Note: the order in this table matches the order of operands in AsmString.
7425 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7426   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7427   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7428   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7429   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7430   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7431   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7432   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7433   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7434   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7435   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7436   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7437   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7438   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7439   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7440   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7441   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7442   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7443   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7444   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7445   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7446   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7447   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7448   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7449   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7450   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7451   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7452   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7453   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7454   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7455   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7456   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7457   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7458   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7459   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7460   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7461   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7462   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7463   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7464   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7465   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7466   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7467 };
7468 
7469 void AMDGPUAsmParser::onBeginOfFile() {
7470   if (!getParser().getStreamer().getTargetStreamer() ||
7471       getSTI().getTargetTriple().getArch() == Triple::r600)
7472     return;
7473 
7474   if (!getTargetStreamer().getTargetID())
7475     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7476 
7477   if (isHsaAbiVersion3AndAbove(&getSTI()))
7478     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7479 }
7480 
7481 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7482 
7483   OperandMatchResultTy res = parseOptionalOpr(Operands);
7484 
7485   // This is a hack to enable hardcoded mandatory operands which follow
7486   // optional operands.
7487   //
7488   // Current design assumes that all operands after the first optional operand
7489   // are also optional. However implementation of some instructions violates
7490   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7491   //
7492   // To alleviate this problem, we have to (implicitly) parse extra operands
7493   // to make sure autogenerated parser of custom operands never hit hardcoded
7494   // mandatory operands.
7495 
7496   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7497     if (res != MatchOperand_Success ||
7498         isToken(AsmToken::EndOfStatement))
7499       break;
7500 
7501     trySkipToken(AsmToken::Comma);
7502     res = parseOptionalOpr(Operands);
7503   }
7504 
7505   return res;
7506 }
7507 
7508 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7509   OperandMatchResultTy res;
7510   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7511     // try to parse any optional operand here
7512     if (Op.IsBit) {
7513       res = parseNamedBit(Op.Name, Operands, Op.Type);
7514     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7515       res = parseOModOperand(Operands);
7516     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7517                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7518                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7519       res = parseSDWASel(Operands, Op.Name, Op.Type);
7520     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7521       res = parseSDWADstUnused(Operands);
7522     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7523                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7524                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7525                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7526       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7527                                         Op.ConvertResult);
7528     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7529       res = parseDim(Operands);
7530     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7531       res = parseCPol(Operands);
7532     } else {
7533       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7534     }
7535     if (res != MatchOperand_NoMatch) {
7536       return res;
7537     }
7538   }
7539   return MatchOperand_NoMatch;
7540 }
7541 
7542 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7543   StringRef Name = getTokenStr();
7544   if (Name == "mul") {
7545     return parseIntWithPrefix("mul", Operands,
7546                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7547   }
7548 
7549   if (Name == "div") {
7550     return parseIntWithPrefix("div", Operands,
7551                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7552   }
7553 
7554   return MatchOperand_NoMatch;
7555 }
7556 
7557 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7558   cvtVOP3P(Inst, Operands);
7559 
7560   int Opc = Inst.getOpcode();
7561 
7562   int SrcNum;
7563   const int Ops[] = { AMDGPU::OpName::src0,
7564                       AMDGPU::OpName::src1,
7565                       AMDGPU::OpName::src2 };
7566   for (SrcNum = 0;
7567        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7568        ++SrcNum);
7569   assert(SrcNum > 0);
7570 
7571   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7572   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7573 
7574   if ((OpSel & (1 << SrcNum)) != 0) {
7575     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7576     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7577     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7578   }
7579 }
7580 
7581 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7582       // 1. This operand is input modifiers
7583   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7584       // 2. This is not last operand
7585       && Desc.NumOperands > (OpNum + 1)
7586       // 3. Next operand is register class
7587       && Desc.OpInfo[OpNum + 1].RegClass != -1
7588       // 4. Next register is not tied to any other operand
7589       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7590 }
7591 
7592 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7593 {
7594   OptionalImmIndexMap OptionalIdx;
7595   unsigned Opc = Inst.getOpcode();
7596 
7597   unsigned I = 1;
7598   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7599   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7600     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7601   }
7602 
7603   for (unsigned E = Operands.size(); I != E; ++I) {
7604     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7605     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7606       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7607     } else if (Op.isInterpSlot() ||
7608                Op.isInterpAttr() ||
7609                Op.isAttrChan()) {
7610       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7611     } else if (Op.isImmModifier()) {
7612       OptionalIdx[Op.getImmTy()] = I;
7613     } else {
7614       llvm_unreachable("unhandled operand type");
7615     }
7616   }
7617 
7618   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7619     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7620   }
7621 
7622   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7623     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7624   }
7625 
7626   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7627     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7628   }
7629 }
7630 
7631 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7632                               OptionalImmIndexMap &OptionalIdx) {
7633   unsigned Opc = Inst.getOpcode();
7634 
7635   unsigned I = 1;
7636   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7637   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7638     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7639   }
7640 
7641   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7642     // This instruction has src modifiers
7643     for (unsigned E = Operands.size(); I != E; ++I) {
7644       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7645       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7646         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7647       } else if (Op.isImmModifier()) {
7648         OptionalIdx[Op.getImmTy()] = I;
7649       } else if (Op.isRegOrImm()) {
7650         Op.addRegOrImmOperands(Inst, 1);
7651       } else {
7652         llvm_unreachable("unhandled operand type");
7653       }
7654     }
7655   } else {
7656     // No src modifiers
7657     for (unsigned E = Operands.size(); I != E; ++I) {
7658       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7659       if (Op.isMod()) {
7660         OptionalIdx[Op.getImmTy()] = I;
7661       } else {
7662         Op.addRegOrImmOperands(Inst, 1);
7663       }
7664     }
7665   }
7666 
7667   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7668     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7669   }
7670 
7671   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7673   }
7674 
7675   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7676   // it has src2 register operand that is tied to dst operand
7677   // we don't allow modifiers for this operand in assembler so src2_modifiers
7678   // should be 0.
7679   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7680       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7681       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7682       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7683       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7684       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7685       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7686       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7687       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7688       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7689       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7690     auto it = Inst.begin();
7691     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7692     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7693     ++it;
7694     // Copy the operand to ensure it's not invalidated when Inst grows.
7695     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7696   }
7697 }
7698 
7699 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7700   OptionalImmIndexMap OptionalIdx;
7701   cvtVOP3(Inst, Operands, OptionalIdx);
7702 }
7703 
7704 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7705                                OptionalImmIndexMap &OptIdx) {
7706   const int Opc = Inst.getOpcode();
7707   const MCInstrDesc &Desc = MII.get(Opc);
7708 
7709   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7710 
7711   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7712     assert(!IsPacked);
7713     Inst.addOperand(Inst.getOperand(0));
7714   }
7715 
7716   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7717   // instruction, and then figure out where to actually put the modifiers
7718 
7719   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7720   if (OpSelIdx != -1) {
7721     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7722   }
7723 
7724   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7725   if (OpSelHiIdx != -1) {
7726     int DefaultVal = IsPacked ? -1 : 0;
7727     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7728                           DefaultVal);
7729   }
7730 
7731   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7732   if (NegLoIdx != -1) {
7733     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7734     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7735   }
7736 
7737   const int Ops[] = { AMDGPU::OpName::src0,
7738                       AMDGPU::OpName::src1,
7739                       AMDGPU::OpName::src2 };
7740   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7741                          AMDGPU::OpName::src1_modifiers,
7742                          AMDGPU::OpName::src2_modifiers };
7743 
7744   unsigned OpSel = 0;
7745   unsigned OpSelHi = 0;
7746   unsigned NegLo = 0;
7747   unsigned NegHi = 0;
7748 
7749   if (OpSelIdx != -1)
7750     OpSel = Inst.getOperand(OpSelIdx).getImm();
7751 
7752   if (OpSelHiIdx != -1)
7753     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7754 
7755   if (NegLoIdx != -1) {
7756     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7757     NegLo = Inst.getOperand(NegLoIdx).getImm();
7758     NegHi = Inst.getOperand(NegHiIdx).getImm();
7759   }
7760 
7761   for (int J = 0; J < 3; ++J) {
7762     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7763     if (OpIdx == -1)
7764       break;
7765 
7766     uint32_t ModVal = 0;
7767 
7768     if ((OpSel & (1 << J)) != 0)
7769       ModVal |= SISrcMods::OP_SEL_0;
7770 
7771     if ((OpSelHi & (1 << J)) != 0)
7772       ModVal |= SISrcMods::OP_SEL_1;
7773 
7774     if ((NegLo & (1 << J)) != 0)
7775       ModVal |= SISrcMods::NEG;
7776 
7777     if ((NegHi & (1 << J)) != 0)
7778       ModVal |= SISrcMods::NEG_HI;
7779 
7780     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7781 
7782     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7783   }
7784 }
7785 
7786 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7787   OptionalImmIndexMap OptIdx;
7788   cvtVOP3(Inst, Operands, OptIdx);
7789   cvtVOP3P(Inst, Operands, OptIdx);
7790 }
7791 
7792 //===----------------------------------------------------------------------===//
7793 // dpp
7794 //===----------------------------------------------------------------------===//
7795 
7796 bool AMDGPUOperand::isDPP8() const {
7797   return isImmTy(ImmTyDPP8);
7798 }
7799 
7800 bool AMDGPUOperand::isDPPCtrl() const {
7801   using namespace AMDGPU::DPP;
7802 
7803   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7804   if (result) {
7805     int64_t Imm = getImm();
7806     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7807            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7808            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7809            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7810            (Imm == DppCtrl::WAVE_SHL1) ||
7811            (Imm == DppCtrl::WAVE_ROL1) ||
7812            (Imm == DppCtrl::WAVE_SHR1) ||
7813            (Imm == DppCtrl::WAVE_ROR1) ||
7814            (Imm == DppCtrl::ROW_MIRROR) ||
7815            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7816            (Imm == DppCtrl::BCAST15) ||
7817            (Imm == DppCtrl::BCAST31) ||
7818            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7819            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7820   }
7821   return false;
7822 }
7823 
7824 //===----------------------------------------------------------------------===//
7825 // mAI
7826 //===----------------------------------------------------------------------===//
7827 
7828 bool AMDGPUOperand::isBLGP() const {
7829   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7830 }
7831 
7832 bool AMDGPUOperand::isCBSZ() const {
7833   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7834 }
7835 
7836 bool AMDGPUOperand::isABID() const {
7837   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7838 }
7839 
7840 bool AMDGPUOperand::isS16Imm() const {
7841   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7842 }
7843 
7844 bool AMDGPUOperand::isU16Imm() const {
7845   return isImm() && isUInt<16>(getImm());
7846 }
7847 
7848 //===----------------------------------------------------------------------===//
7849 // dim
7850 //===----------------------------------------------------------------------===//
7851 
7852 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7853   // We want to allow "dim:1D" etc.,
7854   // but the initial 1 is tokenized as an integer.
7855   std::string Token;
7856   if (isToken(AsmToken::Integer)) {
7857     SMLoc Loc = getToken().getEndLoc();
7858     Token = std::string(getTokenStr());
7859     lex();
7860     if (getLoc() != Loc)
7861       return false;
7862   }
7863 
7864   StringRef Suffix;
7865   if (!parseId(Suffix))
7866     return false;
7867   Token += Suffix;
7868 
7869   StringRef DimId = Token;
7870   if (DimId.startswith("SQ_RSRC_IMG_"))
7871     DimId = DimId.drop_front(12);
7872 
7873   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7874   if (!DimInfo)
7875     return false;
7876 
7877   Encoding = DimInfo->Encoding;
7878   return true;
7879 }
7880 
7881 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7882   if (!isGFX10Plus())
7883     return MatchOperand_NoMatch;
7884 
7885   SMLoc S = getLoc();
7886 
7887   if (!trySkipId("dim", AsmToken::Colon))
7888     return MatchOperand_NoMatch;
7889 
7890   unsigned Encoding;
7891   SMLoc Loc = getLoc();
7892   if (!parseDimId(Encoding)) {
7893     Error(Loc, "invalid dim value");
7894     return MatchOperand_ParseFail;
7895   }
7896 
7897   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7898                                               AMDGPUOperand::ImmTyDim));
7899   return MatchOperand_Success;
7900 }
7901 
7902 //===----------------------------------------------------------------------===//
7903 // dpp
7904 //===----------------------------------------------------------------------===//
7905 
7906 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7907   SMLoc S = getLoc();
7908 
7909   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7910     return MatchOperand_NoMatch;
7911 
7912   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7913 
7914   int64_t Sels[8];
7915 
7916   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7917     return MatchOperand_ParseFail;
7918 
7919   for (size_t i = 0; i < 8; ++i) {
7920     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7921       return MatchOperand_ParseFail;
7922 
7923     SMLoc Loc = getLoc();
7924     if (getParser().parseAbsoluteExpression(Sels[i]))
7925       return MatchOperand_ParseFail;
7926     if (0 > Sels[i] || 7 < Sels[i]) {
7927       Error(Loc, "expected a 3-bit value");
7928       return MatchOperand_ParseFail;
7929     }
7930   }
7931 
7932   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7933     return MatchOperand_ParseFail;
7934 
7935   unsigned DPP8 = 0;
7936   for (size_t i = 0; i < 8; ++i)
7937     DPP8 |= (Sels[i] << (i * 3));
7938 
7939   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7940   return MatchOperand_Success;
7941 }
7942 
7943 bool
7944 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7945                                     const OperandVector &Operands) {
7946   if (Ctrl == "row_newbcast")
7947     return isGFX90A();
7948 
7949   if (Ctrl == "row_share" ||
7950       Ctrl == "row_xmask")
7951     return isGFX10Plus();
7952 
7953   if (Ctrl == "wave_shl" ||
7954       Ctrl == "wave_shr" ||
7955       Ctrl == "wave_rol" ||
7956       Ctrl == "wave_ror" ||
7957       Ctrl == "row_bcast")
7958     return isVI() || isGFX9();
7959 
7960   return Ctrl == "row_mirror" ||
7961          Ctrl == "row_half_mirror" ||
7962          Ctrl == "quad_perm" ||
7963          Ctrl == "row_shl" ||
7964          Ctrl == "row_shr" ||
7965          Ctrl == "row_ror";
7966 }
7967 
7968 int64_t
7969 AMDGPUAsmParser::parseDPPCtrlPerm() {
7970   // quad_perm:[%d,%d,%d,%d]
7971 
7972   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7973     return -1;
7974 
7975   int64_t Val = 0;
7976   for (int i = 0; i < 4; ++i) {
7977     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7978       return -1;
7979 
7980     int64_t Temp;
7981     SMLoc Loc = getLoc();
7982     if (getParser().parseAbsoluteExpression(Temp))
7983       return -1;
7984     if (Temp < 0 || Temp > 3) {
7985       Error(Loc, "expected a 2-bit value");
7986       return -1;
7987     }
7988 
7989     Val += (Temp << i * 2);
7990   }
7991 
7992   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7993     return -1;
7994 
7995   return Val;
7996 }
7997 
7998 int64_t
7999 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8000   using namespace AMDGPU::DPP;
8001 
8002   // sel:%d
8003 
8004   int64_t Val;
8005   SMLoc Loc = getLoc();
8006 
8007   if (getParser().parseAbsoluteExpression(Val))
8008     return -1;
8009 
8010   struct DppCtrlCheck {
8011     int64_t Ctrl;
8012     int Lo;
8013     int Hi;
8014   };
8015 
8016   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8017     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8018     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8019     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8020     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8021     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8022     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8023     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8024     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8025     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8026     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8027     .Default({-1, 0, 0});
8028 
8029   bool Valid;
8030   if (Check.Ctrl == -1) {
8031     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8032     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8033   } else {
8034     Valid = Check.Lo <= Val && Val <= Check.Hi;
8035     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8036   }
8037 
8038   if (!Valid) {
8039     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8040     return -1;
8041   }
8042 
8043   return Val;
8044 }
8045 
8046 OperandMatchResultTy
8047 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8048   using namespace AMDGPU::DPP;
8049 
8050   if (!isToken(AsmToken::Identifier) ||
8051       !isSupportedDPPCtrl(getTokenStr(), Operands))
8052     return MatchOperand_NoMatch;
8053 
8054   SMLoc S = getLoc();
8055   int64_t Val = -1;
8056   StringRef Ctrl;
8057 
8058   parseId(Ctrl);
8059 
8060   if (Ctrl == "row_mirror") {
8061     Val = DppCtrl::ROW_MIRROR;
8062   } else if (Ctrl == "row_half_mirror") {
8063     Val = DppCtrl::ROW_HALF_MIRROR;
8064   } else {
8065     if (skipToken(AsmToken::Colon, "expected a colon")) {
8066       if (Ctrl == "quad_perm") {
8067         Val = parseDPPCtrlPerm();
8068       } else {
8069         Val = parseDPPCtrlSel(Ctrl);
8070       }
8071     }
8072   }
8073 
8074   if (Val == -1)
8075     return MatchOperand_ParseFail;
8076 
8077   Operands.push_back(
8078     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8079   return MatchOperand_Success;
8080 }
8081 
8082 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8083   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8084 }
8085 
8086 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8087   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8088 }
8089 
8090 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8091   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8092 }
8093 
8094 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8095   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8096 }
8097 
8098 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8099   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8100 }
8101 
8102 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8103   OptionalImmIndexMap OptionalIdx;
8104 
8105   unsigned Opc = Inst.getOpcode();
8106   bool HasModifiers =
8107       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8108   unsigned I = 1;
8109   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8110   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8111     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8112   }
8113 
8114   int Fi = 0;
8115   for (unsigned E = Operands.size(); I != E; ++I) {
8116     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8117                                             MCOI::TIED_TO);
8118     if (TiedTo != -1) {
8119       assert((unsigned)TiedTo < Inst.getNumOperands());
8120       // handle tied old or src2 for MAC instructions
8121       Inst.addOperand(Inst.getOperand(TiedTo));
8122     }
8123     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8124     // Add the register arguments
8125     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8126       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8127       // Skip it.
8128       continue;
8129     }
8130 
8131     if (IsDPP8) {
8132       if (Op.isDPP8()) {
8133         Op.addImmOperands(Inst, 1);
8134       } else if (HasModifiers &&
8135                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8136         Op.addRegWithFPInputModsOperands(Inst, 2);
8137       } else if (Op.isFI()) {
8138         Fi = Op.getImm();
8139       } else if (Op.isReg()) {
8140         Op.addRegOperands(Inst, 1);
8141       } else {
8142         llvm_unreachable("Invalid operand type");
8143       }
8144     } else {
8145       if (HasModifiers &&
8146           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8147         Op.addRegWithFPInputModsOperands(Inst, 2);
8148       } else if (Op.isReg()) {
8149         Op.addRegOperands(Inst, 1);
8150       } else if (Op.isDPPCtrl()) {
8151         Op.addImmOperands(Inst, 1);
8152       } else if (Op.isImm()) {
8153         // Handle optional arguments
8154         OptionalIdx[Op.getImmTy()] = I;
8155       } else {
8156         llvm_unreachable("Invalid operand type");
8157       }
8158     }
8159   }
8160 
8161   if (IsDPP8) {
8162     using namespace llvm::AMDGPU::DPP;
8163     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8164   } else {
8165     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8166     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8167     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8168     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8169       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8170     }
8171   }
8172 }
8173 
8174 //===----------------------------------------------------------------------===//
8175 // sdwa
8176 //===----------------------------------------------------------------------===//
8177 
8178 OperandMatchResultTy
8179 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8180                               AMDGPUOperand::ImmTy Type) {
8181   using namespace llvm::AMDGPU::SDWA;
8182 
8183   SMLoc S = getLoc();
8184   StringRef Value;
8185   OperandMatchResultTy res;
8186 
8187   SMLoc StringLoc;
8188   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8189   if (res != MatchOperand_Success) {
8190     return res;
8191   }
8192 
8193   int64_t Int;
8194   Int = StringSwitch<int64_t>(Value)
8195         .Case("BYTE_0", SdwaSel::BYTE_0)
8196         .Case("BYTE_1", SdwaSel::BYTE_1)
8197         .Case("BYTE_2", SdwaSel::BYTE_2)
8198         .Case("BYTE_3", SdwaSel::BYTE_3)
8199         .Case("WORD_0", SdwaSel::WORD_0)
8200         .Case("WORD_1", SdwaSel::WORD_1)
8201         .Case("DWORD", SdwaSel::DWORD)
8202         .Default(0xffffffff);
8203 
8204   if (Int == 0xffffffff) {
8205     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8206     return MatchOperand_ParseFail;
8207   }
8208 
8209   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8210   return MatchOperand_Success;
8211 }
8212 
8213 OperandMatchResultTy
8214 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8215   using namespace llvm::AMDGPU::SDWA;
8216 
8217   SMLoc S = getLoc();
8218   StringRef Value;
8219   OperandMatchResultTy res;
8220 
8221   SMLoc StringLoc;
8222   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8223   if (res != MatchOperand_Success) {
8224     return res;
8225   }
8226 
8227   int64_t Int;
8228   Int = StringSwitch<int64_t>(Value)
8229         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8230         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8231         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8232         .Default(0xffffffff);
8233 
8234   if (Int == 0xffffffff) {
8235     Error(StringLoc, "invalid dst_unused value");
8236     return MatchOperand_ParseFail;
8237   }
8238 
8239   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8240   return MatchOperand_Success;
8241 }
8242 
8243 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8244   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8245 }
8246 
8247 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8248   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8249 }
8250 
8251 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8252   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8253 }
8254 
8255 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8256   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8257 }
8258 
8259 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8260   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8261 }
8262 
8263 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8264                               uint64_t BasicInstType,
8265                               bool SkipDstVcc,
8266                               bool SkipSrcVcc) {
8267   using namespace llvm::AMDGPU::SDWA;
8268 
8269   OptionalImmIndexMap OptionalIdx;
8270   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8271   bool SkippedVcc = false;
8272 
8273   unsigned I = 1;
8274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8275   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8276     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8277   }
8278 
8279   for (unsigned E = Operands.size(); I != E; ++I) {
8280     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8281     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8282         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8283       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8284       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8285       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8286       // Skip VCC only if we didn't skip it on previous iteration.
8287       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8288       if (BasicInstType == SIInstrFlags::VOP2 &&
8289           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8290            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8291         SkippedVcc = true;
8292         continue;
8293       } else if (BasicInstType == SIInstrFlags::VOPC &&
8294                  Inst.getNumOperands() == 0) {
8295         SkippedVcc = true;
8296         continue;
8297       }
8298     }
8299     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8300       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8301     } else if (Op.isImm()) {
8302       // Handle optional arguments
8303       OptionalIdx[Op.getImmTy()] = I;
8304     } else {
8305       llvm_unreachable("Invalid operand type");
8306     }
8307     SkippedVcc = false;
8308   }
8309 
8310   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8311       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8312       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8313     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8314     switch (BasicInstType) {
8315     case SIInstrFlags::VOP1:
8316       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8317       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8318         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8319       }
8320       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8321       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8322       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8323       break;
8324 
8325     case SIInstrFlags::VOP2:
8326       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8327       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8328         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8329       }
8330       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8331       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8332       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8333       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8334       break;
8335 
8336     case SIInstrFlags::VOPC:
8337       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8338         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8339       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8340       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8341       break;
8342 
8343     default:
8344       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8345     }
8346   }
8347 
8348   // special case v_mac_{f16, f32}:
8349   // it has src2 register operand that is tied to dst operand
8350   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8351       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8352     auto it = Inst.begin();
8353     std::advance(
8354       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8355     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8356   }
8357 }
8358 
8359 //===----------------------------------------------------------------------===//
8360 // mAI
8361 //===----------------------------------------------------------------------===//
8362 
8363 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8364   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8365 }
8366 
8367 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8368   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8369 }
8370 
8371 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8372   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8373 }
8374 
8375 /// Force static initialization.
8376 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8377   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8378   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8379 }
8380 
8381 #define GET_REGISTER_MATCHER
8382 #define GET_MATCHER_IMPLEMENTATION
8383 #define GET_MNEMONIC_SPELL_CHECKER
8384 #define GET_MNEMONIC_CHECKER
8385 #include "AMDGPUGenAsmMatcher.inc"
8386 
8387 // This function should be defined after auto-generated include so that we have
8388 // MatchClassKind enum defined
8389 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8390                                                      unsigned Kind) {
8391   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8392   // But MatchInstructionImpl() expects to meet token and fails to validate
8393   // operand. This method checks if we are given immediate operand but expect to
8394   // get corresponding token.
8395   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8396   switch (Kind) {
8397   case MCK_addr64:
8398     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8399   case MCK_gds:
8400     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8401   case MCK_lds:
8402     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8403   case MCK_idxen:
8404     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8405   case MCK_offen:
8406     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8407   case MCK_SSrcB32:
8408     // When operands have expression values, they will return true for isToken,
8409     // because it is not possible to distinguish between a token and an
8410     // expression at parse time. MatchInstructionImpl() will always try to
8411     // match an operand as a token, when isToken returns true, and when the
8412     // name of the expression is not a valid token, the match will fail,
8413     // so we need to handle it here.
8414     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8415   case MCK_SSrcF32:
8416     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8417   case MCK_SoppBrTarget:
8418     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8419   case MCK_VReg32OrOff:
8420     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8421   case MCK_InterpSlot:
8422     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8423   case MCK_Attr:
8424     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8425   case MCK_AttrChan:
8426     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8427   case MCK_ImmSMEMOffset:
8428     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8429   case MCK_SReg_64:
8430   case MCK_SReg_64_XEXEC:
8431     // Null is defined as a 32-bit register but
8432     // it should also be enabled with 64-bit operands.
8433     // The following code enables it for SReg_64 operands
8434     // used as source and destination. Remaining source
8435     // operands are handled in isInlinableImm.
8436     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8437   default:
8438     return Match_InvalidOperand;
8439   }
8440 }
8441 
8442 //===----------------------------------------------------------------------===//
8443 // endpgm
8444 //===----------------------------------------------------------------------===//
8445 
8446 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8447   SMLoc S = getLoc();
8448   int64_t Imm = 0;
8449 
8450   if (!parseExpr(Imm)) {
8451     // The operand is optional, if not present default to 0
8452     Imm = 0;
8453   }
8454 
8455   if (!isUInt<16>(Imm)) {
8456     Error(S, "expected a 16-bit value");
8457     return MatchOperand_ParseFail;
8458   }
8459 
8460   Operands.push_back(
8461       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8462   return MatchOperand_Success;
8463 }
8464 
8465 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8466