1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmParser.h"
29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/TargetRegistry.h"
33 #include "llvm/Support/AMDGPUMetadata.h"
34 #include "llvm/Support/AMDHSAKernelDescriptor.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/MachineValueType.h"
37 #include "llvm/Support/TargetParser.h"
38 
39 using namespace llvm;
40 using namespace llvm::AMDGPU;
41 using namespace llvm::amdhsa;
42 
43 namespace {
44 
45 class AMDGPUAsmParser;
46 
47 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
48 
49 //===----------------------------------------------------------------------===//
50 // Operand
51 //===----------------------------------------------------------------------===//
52 
53 class AMDGPUOperand : public MCParsedAsmOperand {
54   enum KindTy {
55     Token,
56     Immediate,
57     Register,
58     Expression
59   } Kind;
60 
61   SMLoc StartLoc, EndLoc;
62   const AMDGPUAsmParser *AsmParser;
63 
64 public:
65   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
66       : Kind(Kind_), AsmParser(AsmParser_) {}
67 
68   using Ptr = std::unique_ptr<AMDGPUOperand>;
69 
70   struct Modifiers {
71     bool Abs = false;
72     bool Neg = false;
73     bool Sext = false;
74 
75     bool hasFPModifiers() const { return Abs || Neg; }
76     bool hasIntModifiers() const { return Sext; }
77     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
78 
79     int64_t getFPModifiersOperand() const {
80       int64_t Operand = 0;
81       Operand |= Abs ? SISrcMods::ABS : 0u;
82       Operand |= Neg ? SISrcMods::NEG : 0u;
83       return Operand;
84     }
85 
86     int64_t getIntModifiersOperand() const {
87       int64_t Operand = 0;
88       Operand |= Sext ? SISrcMods::SEXT : 0u;
89       return Operand;
90     }
91 
92     int64_t getModifiersOperand() const {
93       assert(!(hasFPModifiers() && hasIntModifiers())
94            && "fp and int modifiers should not be used simultaneously");
95       if (hasFPModifiers()) {
96         return getFPModifiersOperand();
97       } else if (hasIntModifiers()) {
98         return getIntModifiersOperand();
99       } else {
100         return 0;
101       }
102     }
103 
104     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
105   };
106 
107   enum ImmTy {
108     ImmTyNone,
109     ImmTyGDS,
110     ImmTyLDS,
111     ImmTyOffen,
112     ImmTyIdxen,
113     ImmTyAddr64,
114     ImmTyOffset,
115     ImmTyInstOffset,
116     ImmTyOffset0,
117     ImmTyOffset1,
118     ImmTyCPol,
119     ImmTySWZ,
120     ImmTyTFE,
121     ImmTyD16,
122     ImmTyClampSI,
123     ImmTyOModSI,
124     ImmTyDPP8,
125     ImmTyDppCtrl,
126     ImmTyDppRowMask,
127     ImmTyDppBankMask,
128     ImmTyDppBoundCtrl,
129     ImmTyDppFi,
130     ImmTySdwaDstSel,
131     ImmTySdwaSrc0Sel,
132     ImmTySdwaSrc1Sel,
133     ImmTySdwaDstUnused,
134     ImmTyDMask,
135     ImmTyDim,
136     ImmTyUNorm,
137     ImmTyDA,
138     ImmTyR128A16,
139     ImmTyA16,
140     ImmTyLWE,
141     ImmTyExpTgt,
142     ImmTyExpCompr,
143     ImmTyExpVM,
144     ImmTyFORMAT,
145     ImmTyHwreg,
146     ImmTyOff,
147     ImmTySendMsg,
148     ImmTyInterpSlot,
149     ImmTyInterpAttr,
150     ImmTyAttrChan,
151     ImmTyOpSel,
152     ImmTyOpSelHi,
153     ImmTyNegLo,
154     ImmTyNegHi,
155     ImmTySwizzle,
156     ImmTyGprIdxMode,
157     ImmTyHigh,
158     ImmTyBLGP,
159     ImmTyCBSZ,
160     ImmTyABID,
161     ImmTyEndpgm,
162   };
163 
164   enum ImmKindTy {
165     ImmKindTyNone,
166     ImmKindTyLiteral,
167     ImmKindTyConst,
168   };
169 
170 private:
171   struct TokOp {
172     const char *Data;
173     unsigned Length;
174   };
175 
176   struct ImmOp {
177     int64_t Val;
178     ImmTy Type;
179     bool IsFPImm;
180     mutable ImmKindTy Kind;
181     Modifiers Mods;
182   };
183 
184   struct RegOp {
185     unsigned RegNo;
186     Modifiers Mods;
187   };
188 
189   union {
190     TokOp Tok;
191     ImmOp Imm;
192     RegOp Reg;
193     const MCExpr *Expr;
194   };
195 
196 public:
197   bool isToken() const override {
198     if (Kind == Token)
199       return true;
200 
201     // When parsing operands, we can't always tell if something was meant to be
202     // a token, like 'gds', or an expression that references a global variable.
203     // In this case, we assume the string is an expression, and if we need to
204     // interpret is a token, then we treat the symbol name as the token.
205     return isSymbolRefExpr();
206   }
207 
208   bool isSymbolRefExpr() const {
209     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
210   }
211 
212   bool isImm() const override {
213     return Kind == Immediate;
214   }
215 
216   void setImmKindNone() const {
217     assert(isImm());
218     Imm.Kind = ImmKindTyNone;
219   }
220 
221   void setImmKindLiteral() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyLiteral;
224   }
225 
226   void setImmKindConst() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyConst;
229   }
230 
231   bool IsImmKindLiteral() const {
232     return isImm() && Imm.Kind == ImmKindTyLiteral;
233   }
234 
235   bool isImmKindConst() const {
236     return isImm() && Imm.Kind == ImmKindTyConst;
237   }
238 
239   bool isInlinableImm(MVT type) const;
240   bool isLiteralImm(MVT type) const;
241 
242   bool isRegKind() const {
243     return Kind == Register;
244   }
245 
246   bool isReg() const override {
247     return isRegKind() && !hasModifiers();
248   }
249 
250   bool isRegOrInline(unsigned RCID, MVT type) const {
251     return isRegClass(RCID) || isInlinableImm(type);
252   }
253 
254   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
255     return isRegOrInline(RCID, type) || isLiteralImm(type);
256   }
257 
258   bool isRegOrImmWithInt16InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
260   }
261 
262   bool isRegOrImmWithInt32InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
264   }
265 
266   bool isRegOrImmWithInt64InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
268   }
269 
270   bool isRegOrImmWithFP16InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
272   }
273 
274   bool isRegOrImmWithFP32InputMods() const {
275     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
276   }
277 
278   bool isRegOrImmWithFP64InputMods() const {
279     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
280   }
281 
282   bool isVReg() const {
283     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
284            isRegClass(AMDGPU::VReg_64RegClassID) ||
285            isRegClass(AMDGPU::VReg_96RegClassID) ||
286            isRegClass(AMDGPU::VReg_128RegClassID) ||
287            isRegClass(AMDGPU::VReg_160RegClassID) ||
288            isRegClass(AMDGPU::VReg_192RegClassID) ||
289            isRegClass(AMDGPU::VReg_256RegClassID) ||
290            isRegClass(AMDGPU::VReg_512RegClassID) ||
291            isRegClass(AMDGPU::VReg_1024RegClassID);
292   }
293 
294   bool isVReg32() const {
295     return isRegClass(AMDGPU::VGPR_32RegClassID);
296   }
297 
298   bool isVReg32OrOff() const {
299     return isOff() || isVReg32();
300   }
301 
302   bool isNull() const {
303     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
304   }
305 
306   bool isVRegWithInputMods() const;
307 
308   bool isSDWAOperand(MVT type) const;
309   bool isSDWAFP16Operand() const;
310   bool isSDWAFP32Operand() const;
311   bool isSDWAInt16Operand() const;
312   bool isSDWAInt32Operand() const;
313 
314   bool isImmTy(ImmTy ImmT) const {
315     return isImm() && Imm.Type == ImmT;
316   }
317 
318   bool isImmModifier() const {
319     return isImm() && Imm.Type != ImmTyNone;
320   }
321 
322   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
323   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
324   bool isDMask() const { return isImmTy(ImmTyDMask); }
325   bool isDim() const { return isImmTy(ImmTyDim); }
326   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
327   bool isDA() const { return isImmTy(ImmTyDA); }
328   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
329   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
330   bool isLWE() const { return isImmTy(ImmTyLWE); }
331   bool isOff() const { return isImmTy(ImmTyOff); }
332   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
333   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
334   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
335   bool isOffen() const { return isImmTy(ImmTyOffen); }
336   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
337   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
338   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
339   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
340   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
341 
342   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
343   bool isGDS() const { return isImmTy(ImmTyGDS); }
344   bool isLDS() const { return isImmTy(ImmTyLDS); }
345   bool isCPol() const { return isImmTy(ImmTyCPol); }
346   bool isSWZ() const { return isImmTy(ImmTySWZ); }
347   bool isTFE() const { return isImmTy(ImmTyTFE); }
348   bool isD16() const { return isImmTy(ImmTyD16); }
349   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
350   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
351   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
352   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
353   bool isFI() const { return isImmTy(ImmTyDppFi); }
354   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
355   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
356   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
357   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
358   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
359   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
360   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
361   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
362   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
363   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
364   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
365   bool isHigh() const { return isImmTy(ImmTyHigh); }
366 
367   bool isMod() const {
368     return isClampSI() || isOModSI();
369   }
370 
371   bool isRegOrImm() const {
372     return isReg() || isImm();
373   }
374 
375   bool isRegClass(unsigned RCID) const;
376 
377   bool isInlineValue() const;
378 
379   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
380     return isRegOrInline(RCID, type) && !hasModifiers();
381   }
382 
383   bool isSCSrcB16() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
385   }
386 
387   bool isSCSrcV2B16() const {
388     return isSCSrcB16();
389   }
390 
391   bool isSCSrcB32() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
393   }
394 
395   bool isSCSrcB64() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
397   }
398 
399   bool isBoolReg() const;
400 
401   bool isSCSrcF16() const {
402     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
403   }
404 
405   bool isSCSrcV2F16() const {
406     return isSCSrcF16();
407   }
408 
409   bool isSCSrcF32() const {
410     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
411   }
412 
413   bool isSCSrcF64() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
415   }
416 
417   bool isSSrcB32() const {
418     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
419   }
420 
421   bool isSSrcB16() const {
422     return isSCSrcB16() || isLiteralImm(MVT::i16);
423   }
424 
425   bool isSSrcV2B16() const {
426     llvm_unreachable("cannot happen");
427     return isSSrcB16();
428   }
429 
430   bool isSSrcB64() const {
431     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
432     // See isVSrc64().
433     return isSCSrcB64() || isLiteralImm(MVT::i64);
434   }
435 
436   bool isSSrcF32() const {
437     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
438   }
439 
440   bool isSSrcF64() const {
441     return isSCSrcB64() || isLiteralImm(MVT::f64);
442   }
443 
444   bool isSSrcF16() const {
445     return isSCSrcB16() || isLiteralImm(MVT::f16);
446   }
447 
448   bool isSSrcV2F16() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF16();
451   }
452 
453   bool isSSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSSrcF32();
456   }
457 
458   bool isSCSrcV2FP32() const {
459     llvm_unreachable("cannot happen");
460     return isSCSrcF32();
461   }
462 
463   bool isSSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSSrcB32();
466   }
467 
468   bool isSCSrcV2INT32() const {
469     llvm_unreachable("cannot happen");
470     return isSCSrcB32();
471   }
472 
473   bool isSSrcOrLdsB32() const {
474     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
475            isLiteralImm(MVT::i32) || isExpr();
476   }
477 
478   bool isVCSrcB32() const {
479     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
480   }
481 
482   bool isVCSrcB64() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
484   }
485 
486   bool isVCSrcB16() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
488   }
489 
490   bool isVCSrcV2B16() const {
491     return isVCSrcB16();
492   }
493 
494   bool isVCSrcF32() const {
495     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
496   }
497 
498   bool isVCSrcF64() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
500   }
501 
502   bool isVCSrcF16() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
504   }
505 
506   bool isVCSrcV2F16() const {
507     return isVCSrcF16();
508   }
509 
510   bool isVSrcB32() const {
511     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
512   }
513 
514   bool isVSrcB64() const {
515     return isVCSrcF64() || isLiteralImm(MVT::i64);
516   }
517 
518   bool isVSrcB16() const {
519     return isVCSrcB16() || isLiteralImm(MVT::i16);
520   }
521 
522   bool isVSrcV2B16() const {
523     return isVSrcB16() || isLiteralImm(MVT::v2i16);
524   }
525 
526   bool isVCSrcV2FP32() const {
527     return isVCSrcF64();
528   }
529 
530   bool isVSrcV2FP32() const {
531     return isVSrcF64() || isLiteralImm(MVT::v2f32);
532   }
533 
534   bool isVCSrcV2INT32() const {
535     return isVCSrcB64();
536   }
537 
538   bool isVSrcV2INT32() const {
539     return isVSrcB64() || isLiteralImm(MVT::v2i32);
540   }
541 
542   bool isVSrcF32() const {
543     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
544   }
545 
546   bool isVSrcF64() const {
547     return isVCSrcF64() || isLiteralImm(MVT::f64);
548   }
549 
550   bool isVSrcF16() const {
551     return isVCSrcF16() || isLiteralImm(MVT::f16);
552   }
553 
554   bool isVSrcV2F16() const {
555     return isVSrcF16() || isLiteralImm(MVT::v2f16);
556   }
557 
558   bool isVISrcB32() const {
559     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
560   }
561 
562   bool isVISrcB16() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
564   }
565 
566   bool isVISrcV2B16() const {
567     return isVISrcB16();
568   }
569 
570   bool isVISrcF32() const {
571     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
572   }
573 
574   bool isVISrcF16() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
576   }
577 
578   bool isVISrcV2F16() const {
579     return isVISrcF16() || isVISrcB32();
580   }
581 
582   bool isVISrc_64B64() const {
583     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
584   }
585 
586   bool isVISrc_64F64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
588   }
589 
590   bool isVISrc_64V2FP32() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
592   }
593 
594   bool isVISrc_64V2INT32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
596   }
597 
598   bool isVISrc_256B64() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
600   }
601 
602   bool isVISrc_256F64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
604   }
605 
606   bool isVISrc_128B16() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
608   }
609 
610   bool isVISrc_128V2B16() const {
611     return isVISrc_128B16();
612   }
613 
614   bool isVISrc_128B32() const {
615     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
616   }
617 
618   bool isVISrc_128F32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
620   }
621 
622   bool isVISrc_256V2FP32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2INT32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
628   }
629 
630   bool isVISrc_512B32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B16() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
636   }
637 
638   bool isVISrc_512V2B16() const {
639     return isVISrc_512B16();
640   }
641 
642   bool isVISrc_512F32() const {
643     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
644   }
645 
646   bool isVISrc_512F16() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
648   }
649 
650   bool isVISrc_512V2F16() const {
651     return isVISrc_512F16() || isVISrc_512B32();
652   }
653 
654   bool isVISrc_1024B32() const {
655     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
656   }
657 
658   bool isVISrc_1024B16() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
660   }
661 
662   bool isVISrc_1024V2B16() const {
663     return isVISrc_1024B16();
664   }
665 
666   bool isVISrc_1024F32() const {
667     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
668   }
669 
670   bool isVISrc_1024F16() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
672   }
673 
674   bool isVISrc_1024V2F16() const {
675     return isVISrc_1024F16() || isVISrc_1024B32();
676   }
677 
678   bool isAISrcB32() const {
679     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
680   }
681 
682   bool isAISrcB16() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
684   }
685 
686   bool isAISrcV2B16() const {
687     return isAISrcB16();
688   }
689 
690   bool isAISrcF32() const {
691     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
692   }
693 
694   bool isAISrcF16() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
696   }
697 
698   bool isAISrcV2F16() const {
699     return isAISrcF16() || isAISrcB32();
700   }
701 
702   bool isAISrc_64B64() const {
703     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
704   }
705 
706   bool isAISrc_64F64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
708   }
709 
710   bool isAISrc_128B32() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
712   }
713 
714   bool isAISrc_128B16() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
716   }
717 
718   bool isAISrc_128V2B16() const {
719     return isAISrc_128B16();
720   }
721 
722   bool isAISrc_128F32() const {
723     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
724   }
725 
726   bool isAISrc_128F16() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
728   }
729 
730   bool isAISrc_128V2F16() const {
731     return isAISrc_128F16() || isAISrc_128B32();
732   }
733 
734   bool isVISrc_128F16() const {
735     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
736   }
737 
738   bool isVISrc_128V2F16() const {
739     return isVISrc_128F16() || isVISrc_128B32();
740   }
741 
742   bool isAISrc_256B64() const {
743     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
744   }
745 
746   bool isAISrc_256F64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
748   }
749 
750   bool isAISrc_512B32() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
752   }
753 
754   bool isAISrc_512B16() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
756   }
757 
758   bool isAISrc_512V2B16() const {
759     return isAISrc_512B16();
760   }
761 
762   bool isAISrc_512F32() const {
763     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
764   }
765 
766   bool isAISrc_512F16() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
768   }
769 
770   bool isAISrc_512V2F16() const {
771     return isAISrc_512F16() || isAISrc_512B32();
772   }
773 
774   bool isAISrc_1024B32() const {
775     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
776   }
777 
778   bool isAISrc_1024B16() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
780   }
781 
782   bool isAISrc_1024V2B16() const {
783     return isAISrc_1024B16();
784   }
785 
786   bool isAISrc_1024F32() const {
787     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
788   }
789 
790   bool isAISrc_1024F16() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
792   }
793 
794   bool isAISrc_1024V2F16() const {
795     return isAISrc_1024F16() || isAISrc_1024B32();
796   }
797 
798   bool isKImmFP32() const {
799     return isLiteralImm(MVT::f32);
800   }
801 
802   bool isKImmFP16() const {
803     return isLiteralImm(MVT::f16);
804   }
805 
806   bool isMem() const override {
807     return false;
808   }
809 
810   bool isExpr() const {
811     return Kind == Expression;
812   }
813 
814   bool isSoppBrTarget() const {
815     return isExpr() || isImm();
816   }
817 
818   bool isSWaitCnt() const;
819   bool isHwreg() const;
820   bool isSendMsg() const;
821   bool isSwizzle() const;
822   bool isSMRDOffset8() const;
823   bool isSMEMOffset() const;
824   bool isSMRDLiteralOffset() const;
825   bool isDPP8() const;
826   bool isDPPCtrl() const;
827   bool isBLGP() const;
828   bool isCBSZ() const;
829   bool isABID() const;
830   bool isGPRIdxMode() const;
831   bool isS16Imm() const;
832   bool isU16Imm() const;
833   bool isEndpgm() const;
834 
835   StringRef getExpressionAsToken() const {
836     assert(isExpr());
837     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
838     return S->getSymbol().getName();
839   }
840 
841   StringRef getToken() const {
842     assert(isToken());
843 
844     if (Kind == Expression)
845       return getExpressionAsToken();
846 
847     return StringRef(Tok.Data, Tok.Length);
848   }
849 
850   int64_t getImm() const {
851     assert(isImm());
852     return Imm.Val;
853   }
854 
855   void setImm(int64_t Val) {
856     assert(isImm());
857     Imm.Val = Val;
858   }
859 
860   ImmTy getImmTy() const {
861     assert(isImm());
862     return Imm.Type;
863   }
864 
865   unsigned getReg() const override {
866     assert(isRegKind());
867     return Reg.RegNo;
868   }
869 
870   SMLoc getStartLoc() const override {
871     return StartLoc;
872   }
873 
874   SMLoc getEndLoc() const override {
875     return EndLoc;
876   }
877 
878   SMRange getLocRange() const {
879     return SMRange(StartLoc, EndLoc);
880   }
881 
882   Modifiers getModifiers() const {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     return isRegKind() ? Reg.Mods : Imm.Mods;
885   }
886 
887   void setModifiers(Modifiers Mods) {
888     assert(isRegKind() || isImmTy(ImmTyNone));
889     if (isRegKind())
890       Reg.Mods = Mods;
891     else
892       Imm.Mods = Mods;
893   }
894 
895   bool hasModifiers() const {
896     return getModifiers().hasModifiers();
897   }
898 
899   bool hasFPModifiers() const {
900     return getModifiers().hasFPModifiers();
901   }
902 
903   bool hasIntModifiers() const {
904     return getModifiers().hasIntModifiers();
905   }
906 
907   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
908 
909   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
910 
911   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
912 
913   template <unsigned Bitwidth>
914   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
915 
916   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
917     addKImmFPOperands<16>(Inst, N);
918   }
919 
920   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<32>(Inst, N);
922   }
923 
924   void addRegOperands(MCInst &Inst, unsigned N) const;
925 
926   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
927     addRegOperands(Inst, N);
928   }
929 
930   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
931     if (isRegKind())
932       addRegOperands(Inst, N);
933     else if (isExpr())
934       Inst.addOperand(MCOperand::createExpr(Expr));
935     else
936       addImmOperands(Inst, N);
937   }
938 
939   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
940     Modifiers Mods = getModifiers();
941     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
942     if (isRegKind()) {
943       addRegOperands(Inst, N);
944     } else {
945       addImmOperands(Inst, N, false);
946     }
947   }
948 
949   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasIntModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
955     assert(!hasFPModifiers());
956     addRegOrImmWithInputModsOperands(Inst, N);
957   }
958 
959   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
960     Modifiers Mods = getModifiers();
961     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
962     assert(isRegKind());
963     addRegOperands(Inst, N);
964   }
965 
966   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasIntModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
972     assert(!hasFPModifiers());
973     addRegWithInputModsOperands(Inst, N);
974   }
975 
976   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
977     if (isImm())
978       addImmOperands(Inst, N);
979     else {
980       assert(isExpr());
981       Inst.addOperand(MCOperand::createExpr(Expr));
982     }
983   }
984 
985   static void printImmTy(raw_ostream& OS, ImmTy Type) {
986     switch (Type) {
987     case ImmTyNone: OS << "None"; break;
988     case ImmTyGDS: OS << "GDS"; break;
989     case ImmTyLDS: OS << "LDS"; break;
990     case ImmTyOffen: OS << "Offen"; break;
991     case ImmTyIdxen: OS << "Idxen"; break;
992     case ImmTyAddr64: OS << "Addr64"; break;
993     case ImmTyOffset: OS << "Offset"; break;
994     case ImmTyInstOffset: OS << "InstOffset"; break;
995     case ImmTyOffset0: OS << "Offset0"; break;
996     case ImmTyOffset1: OS << "Offset1"; break;
997     case ImmTyCPol: OS << "CPol"; break;
998     case ImmTySWZ: OS << "SWZ"; break;
999     case ImmTyTFE: OS << "TFE"; break;
1000     case ImmTyD16: OS << "D16"; break;
1001     case ImmTyFORMAT: OS << "FORMAT"; break;
1002     case ImmTyClampSI: OS << "ClampSI"; break;
1003     case ImmTyOModSI: OS << "OModSI"; break;
1004     case ImmTyDPP8: OS << "DPP8"; break;
1005     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1006     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1007     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1008     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1009     case ImmTyDppFi: OS << "FI"; break;
1010     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1011     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1012     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1013     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1014     case ImmTyDMask: OS << "DMask"; break;
1015     case ImmTyDim: OS << "Dim"; break;
1016     case ImmTyUNorm: OS << "UNorm"; break;
1017     case ImmTyDA: OS << "DA"; break;
1018     case ImmTyR128A16: OS << "R128A16"; break;
1019     case ImmTyA16: OS << "A16"; break;
1020     case ImmTyLWE: OS << "LWE"; break;
1021     case ImmTyOff: OS << "Off"; break;
1022     case ImmTyExpTgt: OS << "ExpTgt"; break;
1023     case ImmTyExpCompr: OS << "ExpCompr"; break;
1024     case ImmTyExpVM: OS << "ExpVM"; break;
1025     case ImmTyHwreg: OS << "Hwreg"; break;
1026     case ImmTySendMsg: OS << "SendMsg"; break;
1027     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1028     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1029     case ImmTyAttrChan: OS << "AttrChan"; break;
1030     case ImmTyOpSel: OS << "OpSel"; break;
1031     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1032     case ImmTyNegLo: OS << "NegLo"; break;
1033     case ImmTyNegHi: OS << "NegHi"; break;
1034     case ImmTySwizzle: OS << "Swizzle"; break;
1035     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1036     case ImmTyHigh: OS << "High"; break;
1037     case ImmTyBLGP: OS << "BLGP"; break;
1038     case ImmTyCBSZ: OS << "CBSZ"; break;
1039     case ImmTyABID: OS << "ABID"; break;
1040     case ImmTyEndpgm: OS << "Endpgm"; break;
1041     }
1042   }
1043 
1044   void print(raw_ostream &OS) const override {
1045     switch (Kind) {
1046     case Register:
1047       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1048       break;
1049     case Immediate:
1050       OS << '<' << getImm();
1051       if (getImmTy() != ImmTyNone) {
1052         OS << " type: "; printImmTy(OS, getImmTy());
1053       }
1054       OS << " mods: " << Imm.Mods << '>';
1055       break;
1056     case Token:
1057       OS << '\'' << getToken() << '\'';
1058       break;
1059     case Expression:
1060       OS << "<expr " << *Expr << '>';
1061       break;
1062     }
1063   }
1064 
1065   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1066                                       int64_t Val, SMLoc Loc,
1067                                       ImmTy Type = ImmTyNone,
1068                                       bool IsFPImm = false) {
1069     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1070     Op->Imm.Val = Val;
1071     Op->Imm.IsFPImm = IsFPImm;
1072     Op->Imm.Kind = ImmKindTyNone;
1073     Op->Imm.Type = Type;
1074     Op->Imm.Mods = Modifiers();
1075     Op->StartLoc = Loc;
1076     Op->EndLoc = Loc;
1077     return Op;
1078   }
1079 
1080   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1081                                         StringRef Str, SMLoc Loc,
1082                                         bool HasExplicitEncodingSize = true) {
1083     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1084     Res->Tok.Data = Str.data();
1085     Res->Tok.Length = Str.size();
1086     Res->StartLoc = Loc;
1087     Res->EndLoc = Loc;
1088     return Res;
1089   }
1090 
1091   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1092                                       unsigned RegNo, SMLoc S,
1093                                       SMLoc E) {
1094     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1095     Op->Reg.RegNo = RegNo;
1096     Op->Reg.Mods = Modifiers();
1097     Op->StartLoc = S;
1098     Op->EndLoc = E;
1099     return Op;
1100   }
1101 
1102   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1103                                        const class MCExpr *Expr, SMLoc S) {
1104     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1105     Op->Expr = Expr;
1106     Op->StartLoc = S;
1107     Op->EndLoc = S;
1108     return Op;
1109   }
1110 };
1111 
1112 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1113   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1114   return OS;
1115 }
1116 
1117 //===----------------------------------------------------------------------===//
1118 // AsmParser
1119 //===----------------------------------------------------------------------===//
1120 
1121 // Holds info related to the current kernel, e.g. count of SGPRs used.
1122 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1123 // .amdgpu_hsa_kernel or at EOF.
1124 class KernelScopeInfo {
1125   int SgprIndexUnusedMin = -1;
1126   int VgprIndexUnusedMin = -1;
1127   MCContext *Ctx = nullptr;
1128 
1129   void usesSgprAt(int i) {
1130     if (i >= SgprIndexUnusedMin) {
1131       SgprIndexUnusedMin = ++i;
1132       if (Ctx) {
1133         MCSymbol* const Sym =
1134           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1135         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1136       }
1137     }
1138   }
1139 
1140   void usesVgprAt(int i) {
1141     if (i >= VgprIndexUnusedMin) {
1142       VgprIndexUnusedMin = ++i;
1143       if (Ctx) {
1144         MCSymbol* const Sym =
1145           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1146         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1147       }
1148     }
1149   }
1150 
1151 public:
1152   KernelScopeInfo() = default;
1153 
1154   void initialize(MCContext &Context) {
1155     Ctx = &Context;
1156     usesSgprAt(SgprIndexUnusedMin = -1);
1157     usesVgprAt(VgprIndexUnusedMin = -1);
1158   }
1159 
1160   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1161     switch (RegKind) {
1162       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1163       case IS_AGPR: // fall through
1164       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1165       default: break;
1166     }
1167   }
1168 };
1169 
1170 class AMDGPUAsmParser : public MCTargetAsmParser {
1171   MCAsmParser &Parser;
1172 
1173   // Number of extra operands parsed after the first optional operand.
1174   // This may be necessary to skip hardcoded mandatory operands.
1175   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1176 
1177   unsigned ForcedEncodingSize = 0;
1178   bool ForcedDPP = false;
1179   bool ForcedSDWA = false;
1180   KernelScopeInfo KernelScope;
1181   unsigned CPolSeen;
1182 
1183   /// @name Auto-generated Match Functions
1184   /// {
1185 
1186 #define GET_ASSEMBLER_HEADER
1187 #include "AMDGPUGenAsmMatcher.inc"
1188 
1189   /// }
1190 
1191 private:
1192   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1193   bool OutOfRangeError(SMRange Range);
1194   /// Calculate VGPR/SGPR blocks required for given target, reserved
1195   /// registers, and user-specified NextFreeXGPR values.
1196   ///
1197   /// \param Features [in] Target features, used for bug corrections.
1198   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1199   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1200   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1201   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1202   /// descriptor field, if valid.
1203   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1204   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1205   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1206   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1207   /// \param VGPRBlocks [out] Result VGPR block count.
1208   /// \param SGPRBlocks [out] Result SGPR block count.
1209   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1210                           bool FlatScrUsed, bool XNACKUsed,
1211                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1212                           SMRange VGPRRange, unsigned NextFreeSGPR,
1213                           SMRange SGPRRange, unsigned &VGPRBlocks,
1214                           unsigned &SGPRBlocks);
1215   bool ParseDirectiveAMDGCNTarget();
1216   bool ParseDirectiveAMDHSAKernel();
1217   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1218   bool ParseDirectiveHSACodeObjectVersion();
1219   bool ParseDirectiveHSACodeObjectISA();
1220   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1221   bool ParseDirectiveAMDKernelCodeT();
1222   // TODO: Possibly make subtargetHasRegister const.
1223   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1224   bool ParseDirectiveAMDGPUHsaKernel();
1225 
1226   bool ParseDirectiveISAVersion();
1227   bool ParseDirectiveHSAMetadata();
1228   bool ParseDirectivePALMetadataBegin();
1229   bool ParseDirectivePALMetadata();
1230   bool ParseDirectiveAMDGPULDS();
1231 
1232   /// Common code to parse out a block of text (typically YAML) between start and
1233   /// end directives.
1234   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1235                            const char *AssemblerDirectiveEnd,
1236                            std::string &CollectString);
1237 
1238   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1239                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1240   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1241                            unsigned &RegNum, unsigned &RegWidth,
1242                            bool RestoreOnFailure = false);
1243   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1244                            unsigned &RegNum, unsigned &RegWidth,
1245                            SmallVectorImpl<AsmToken> &Tokens);
1246   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1247                            unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1250                            unsigned &RegWidth,
1251                            SmallVectorImpl<AsmToken> &Tokens);
1252   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1253                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1254   bool ParseRegRange(unsigned& Num, unsigned& Width);
1255   unsigned getRegularReg(RegisterKind RegKind,
1256                          unsigned RegNum,
1257                          unsigned RegWidth,
1258                          SMLoc Loc);
1259 
1260   bool isRegister();
1261   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1262   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1263   void initializeGprCountSymbol(RegisterKind RegKind);
1264   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1265                              unsigned RegWidth);
1266   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1267                     bool IsAtomic, bool IsLds = false);
1268   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1269                  bool IsGdsHardcoded);
1270 
1271 public:
1272   enum AMDGPUMatchResultTy {
1273     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1274   };
1275   enum OperandMode {
1276     OperandMode_Default,
1277     OperandMode_NSA,
1278   };
1279 
1280   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1281 
1282   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1283                const MCInstrInfo &MII,
1284                const MCTargetOptions &Options)
1285       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1286     MCAsmParserExtension::Initialize(Parser);
1287 
1288     if (getFeatureBits().none()) {
1289       // Set default features.
1290       copySTI().ToggleFeature("southern-islands");
1291     }
1292 
1293     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1294 
1295     {
1296       // TODO: make those pre-defined variables read-only.
1297       // Currently there is none suitable machinery in the core llvm-mc for this.
1298       // MCSymbol::isRedefinable is intended for another purpose, and
1299       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1300       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1301       MCContext &Ctx = getContext();
1302       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1303         MCSymbol *Sym =
1304             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1305         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1306         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1307         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1308         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1309         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1310       } else {
1311         MCSymbol *Sym =
1312             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1313         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1314         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1315         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1316         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1317         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1318       }
1319       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1320         initializeGprCountSymbol(IS_VGPR);
1321         initializeGprCountSymbol(IS_SGPR);
1322       } else
1323         KernelScope.initialize(getContext());
1324     }
1325   }
1326 
1327   bool hasMIMG_R128() const {
1328     return AMDGPU::hasMIMG_R128(getSTI());
1329   }
1330 
1331   bool hasPackedD16() const {
1332     return AMDGPU::hasPackedD16(getSTI());
1333   }
1334 
1335   bool hasGFX10A16() const {
1336     return AMDGPU::hasGFX10A16(getSTI());
1337   }
1338 
1339   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1340 
1341   bool isSI() const {
1342     return AMDGPU::isSI(getSTI());
1343   }
1344 
1345   bool isCI() const {
1346     return AMDGPU::isCI(getSTI());
1347   }
1348 
1349   bool isVI() const {
1350     return AMDGPU::isVI(getSTI());
1351   }
1352 
1353   bool isGFX9() const {
1354     return AMDGPU::isGFX9(getSTI());
1355   }
1356 
1357   bool isGFX90A() const {
1358     return AMDGPU::isGFX90A(getSTI());
1359   }
1360 
1361   bool isGFX9Plus() const {
1362     return AMDGPU::isGFX9Plus(getSTI());
1363   }
1364 
1365   bool isGFX10() const {
1366     return AMDGPU::isGFX10(getSTI());
1367   }
1368 
1369   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1370 
1371   bool isGFX10_BEncoding() const {
1372     return AMDGPU::isGFX10_BEncoding(getSTI());
1373   }
1374 
1375   bool hasInv2PiInlineImm() const {
1376     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1377   }
1378 
1379   bool hasFlatOffsets() const {
1380     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1381   }
1382 
1383   bool hasArchitectedFlatScratch() const {
1384     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1385   }
1386 
1387   bool hasSGPR102_SGPR103() const {
1388     return !isVI() && !isGFX9();
1389   }
1390 
1391   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1392 
1393   bool hasIntClamp() const {
1394     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1395   }
1396 
1397   AMDGPUTargetStreamer &getTargetStreamer() {
1398     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1399     return static_cast<AMDGPUTargetStreamer &>(TS);
1400   }
1401 
1402   const MCRegisterInfo *getMRI() const {
1403     // We need this const_cast because for some reason getContext() is not const
1404     // in MCAsmParser.
1405     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1406   }
1407 
1408   const MCInstrInfo *getMII() const {
1409     return &MII;
1410   }
1411 
1412   const FeatureBitset &getFeatureBits() const {
1413     return getSTI().getFeatureBits();
1414   }
1415 
1416   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1417   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1418   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1419 
1420   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1421   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1422   bool isForcedDPP() const { return ForcedDPP; }
1423   bool isForcedSDWA() const { return ForcedSDWA; }
1424   ArrayRef<unsigned> getMatchedVariants() const;
1425   StringRef getMatchedVariantName() const;
1426 
1427   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1428   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1429                      bool RestoreOnFailure);
1430   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1431   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1432                                         SMLoc &EndLoc) override;
1433   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1434   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1435                                       unsigned Kind) override;
1436   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1437                                OperandVector &Operands, MCStreamer &Out,
1438                                uint64_t &ErrorInfo,
1439                                bool MatchingInlineAsm) override;
1440   bool ParseDirective(AsmToken DirectiveID) override;
1441   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1442                                     OperandMode Mode = OperandMode_Default);
1443   StringRef parseMnemonicSuffix(StringRef Name);
1444   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1445                         SMLoc NameLoc, OperandVector &Operands) override;
1446   //bool ProcessInstruction(MCInst &Inst);
1447 
1448   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1449 
1450   OperandMatchResultTy
1451   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1452                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1453                      bool (*ConvertResult)(int64_t &) = nullptr);
1454 
1455   OperandMatchResultTy
1456   parseOperandArrayWithPrefix(const char *Prefix,
1457                               OperandVector &Operands,
1458                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1459                               bool (*ConvertResult)(int64_t&) = nullptr);
1460 
1461   OperandMatchResultTy
1462   parseNamedBit(StringRef Name, OperandVector &Operands,
1463                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1464   OperandMatchResultTy parseCPol(OperandVector &Operands);
1465   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1466                                              StringRef &Value,
1467                                              SMLoc &StringLoc);
1468 
1469   bool isModifier();
1470   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1474   bool parseSP3NegModifier();
1475   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1476   OperandMatchResultTy parseReg(OperandVector &Operands);
1477   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1478   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1479   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1480   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1481   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1482   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1483   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1484   OperandMatchResultTy parseUfmt(int64_t &Format);
1485   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1486   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1488   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1489   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1490   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1491   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1492 
1493   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1494   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1495   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1496   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1497 
1498   bool parseCnt(int64_t &IntVal);
1499   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1500   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1501 
1502 private:
1503   struct OperandInfoTy {
1504     SMLoc Loc;
1505     int64_t Id;
1506     bool IsSymbolic = false;
1507     bool IsDefined = false;
1508 
1509     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1510   };
1511 
1512   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1513   bool validateSendMsg(const OperandInfoTy &Msg,
1514                        const OperandInfoTy &Op,
1515                        const OperandInfoTy &Stream);
1516 
1517   bool parseHwregBody(OperandInfoTy &HwReg,
1518                       OperandInfoTy &Offset,
1519                       OperandInfoTy &Width);
1520   bool validateHwreg(const OperandInfoTy &HwReg,
1521                      const OperandInfoTy &Offset,
1522                      const OperandInfoTy &Width);
1523 
1524   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1525   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1526 
1527   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1528                       const OperandVector &Operands) const;
1529   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1530   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1531   SMLoc getLitLoc(const OperandVector &Operands) const;
1532   SMLoc getConstLoc(const OperandVector &Operands) const;
1533 
1534   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1535   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateSOPLiteral(const MCInst &Inst) const;
1538   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateIntClampSupported(const MCInst &Inst);
1541   bool validateMIMGAtomicDMask(const MCInst &Inst);
1542   bool validateMIMGGatherDMask(const MCInst &Inst);
1543   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateMIMGDataSize(const MCInst &Inst);
1545   bool validateMIMGAddrSize(const MCInst &Inst);
1546   bool validateMIMGD16(const MCInst &Inst);
1547   bool validateMIMGDim(const MCInst &Inst);
1548   bool validateMIMGMSAA(const MCInst &Inst);
1549   bool validateOpSel(const MCInst &Inst);
1550   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1551   bool validateVccOperand(unsigned Reg) const;
1552   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1553   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1555   bool validateAGPRLdSt(const MCInst &Inst) const;
1556   bool validateVGPRAlign(const MCInst &Inst) const;
1557   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1558   bool validateDivScale(const MCInst &Inst);
1559   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1560                              const SMLoc &IDLoc);
1561   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1562   unsigned getConstantBusLimit(unsigned Opcode) const;
1563   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1564   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1565   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1566 
1567   bool isSupportedMnemo(StringRef Mnemo,
1568                         const FeatureBitset &FBS);
1569   bool isSupportedMnemo(StringRef Mnemo,
1570                         const FeatureBitset &FBS,
1571                         ArrayRef<unsigned> Variants);
1572   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1573 
1574   bool isId(const StringRef Id) const;
1575   bool isId(const AsmToken &Token, const StringRef Id) const;
1576   bool isToken(const AsmToken::TokenKind Kind) const;
1577   bool trySkipId(const StringRef Id);
1578   bool trySkipId(const StringRef Pref, const StringRef Id);
1579   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1580   bool trySkipToken(const AsmToken::TokenKind Kind);
1581   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1582   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1583   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1584 
1585   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1586   AsmToken::TokenKind getTokenKind() const;
1587   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1588   bool parseExpr(OperandVector &Operands);
1589   StringRef getTokenStr() const;
1590   AsmToken peekToken();
1591   AsmToken getToken() const;
1592   SMLoc getLoc() const;
1593   void lex();
1594 
1595 public:
1596   void onBeginOfFile() override;
1597 
1598   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1599   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1600 
1601   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1602   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1603   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1604   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1605   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1606   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1607 
1608   bool parseSwizzleOperand(int64_t &Op,
1609                            const unsigned MinVal,
1610                            const unsigned MaxVal,
1611                            const StringRef ErrMsg,
1612                            SMLoc &Loc);
1613   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1614                             const unsigned MinVal,
1615                             const unsigned MaxVal,
1616                             const StringRef ErrMsg);
1617   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1618   bool parseSwizzleOffset(int64_t &Imm);
1619   bool parseSwizzleMacro(int64_t &Imm);
1620   bool parseSwizzleQuadPerm(int64_t &Imm);
1621   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1622   bool parseSwizzleBroadcast(int64_t &Imm);
1623   bool parseSwizzleSwap(int64_t &Imm);
1624   bool parseSwizzleReverse(int64_t &Imm);
1625 
1626   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1627   int64_t parseGPRIdxMacro();
1628 
1629   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1630   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1631   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1632   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1633 
1634   AMDGPUOperand::Ptr defaultCPol() const;
1635 
1636   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1637   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1638   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1639   AMDGPUOperand::Ptr defaultFlatOffset() const;
1640 
1641   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1642 
1643   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1644                OptionalImmIndexMap &OptionalIdx);
1645   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1646   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1647   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1648   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1649                 OptionalImmIndexMap &OptionalIdx);
1650 
1651   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1652 
1653   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1654                bool IsAtomic = false);
1655   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1656   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1657 
1658   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1659 
1660   bool parseDimId(unsigned &Encoding);
1661   OperandMatchResultTy parseDim(OperandVector &Operands);
1662   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1663   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1664   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1665   int64_t parseDPPCtrlSel(StringRef Ctrl);
1666   int64_t parseDPPCtrlPerm();
1667   AMDGPUOperand::Ptr defaultRowMask() const;
1668   AMDGPUOperand::Ptr defaultBankMask() const;
1669   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1670   AMDGPUOperand::Ptr defaultFI() const;
1671   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1672   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1673 
1674   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1675                                     AMDGPUOperand::ImmTy Type);
1676   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1677   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1679   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1680   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1681   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1682   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1683                uint64_t BasicInstType,
1684                bool SkipDstVcc = false,
1685                bool SkipSrcVcc = false);
1686 
1687   AMDGPUOperand::Ptr defaultBLGP() const;
1688   AMDGPUOperand::Ptr defaultCBSZ() const;
1689   AMDGPUOperand::Ptr defaultABID() const;
1690 
1691   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1692   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1693 };
1694 
1695 struct OptionalOperand {
1696   const char *Name;
1697   AMDGPUOperand::ImmTy Type;
1698   bool IsBit;
1699   bool (*ConvertResult)(int64_t&);
1700 };
1701 
1702 } // end anonymous namespace
1703 
1704 // May be called with integer type with equivalent bitwidth.
1705 static const fltSemantics *getFltSemantics(unsigned Size) {
1706   switch (Size) {
1707   case 4:
1708     return &APFloat::IEEEsingle();
1709   case 8:
1710     return &APFloat::IEEEdouble();
1711   case 2:
1712     return &APFloat::IEEEhalf();
1713   default:
1714     llvm_unreachable("unsupported fp type");
1715   }
1716 }
1717 
1718 static const fltSemantics *getFltSemantics(MVT VT) {
1719   return getFltSemantics(VT.getSizeInBits() / 8);
1720 }
1721 
1722 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1723   switch (OperandType) {
1724   case AMDGPU::OPERAND_REG_IMM_INT32:
1725   case AMDGPU::OPERAND_REG_IMM_FP32:
1726   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1727   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1728   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1729   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1730   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1731   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1732   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1733   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1734   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1735   case AMDGPU::OPERAND_KIMM32:
1736     return &APFloat::IEEEsingle();
1737   case AMDGPU::OPERAND_REG_IMM_INT64:
1738   case AMDGPU::OPERAND_REG_IMM_FP64:
1739   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1740   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1741   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1742     return &APFloat::IEEEdouble();
1743   case AMDGPU::OPERAND_REG_IMM_INT16:
1744   case AMDGPU::OPERAND_REG_IMM_FP16:
1745   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1746   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1747   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1748   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1749   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1750   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1751   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1752   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1753   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1754   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1755   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1756   case AMDGPU::OPERAND_KIMM16:
1757     return &APFloat::IEEEhalf();
1758   default:
1759     llvm_unreachable("unsupported fp type");
1760   }
1761 }
1762 
1763 //===----------------------------------------------------------------------===//
1764 // Operand
1765 //===----------------------------------------------------------------------===//
1766 
1767 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1768   bool Lost;
1769 
1770   // Convert literal to single precision
1771   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1772                                                APFloat::rmNearestTiesToEven,
1773                                                &Lost);
1774   // We allow precision lost but not overflow or underflow
1775   if (Status != APFloat::opOK &&
1776       Lost &&
1777       ((Status & APFloat::opOverflow)  != 0 ||
1778        (Status & APFloat::opUnderflow) != 0)) {
1779     return false;
1780   }
1781 
1782   return true;
1783 }
1784 
1785 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1786   return isUIntN(Size, Val) || isIntN(Size, Val);
1787 }
1788 
1789 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1790   if (VT.getScalarType() == MVT::i16) {
1791     // FP immediate values are broken.
1792     return isInlinableIntLiteral(Val);
1793   }
1794 
1795   // f16/v2f16 operands work correctly for all values.
1796   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1797 }
1798 
1799 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1800 
1801   // This is a hack to enable named inline values like
1802   // shared_base with both 32-bit and 64-bit operands.
1803   // Note that these values are defined as
1804   // 32-bit operands only.
1805   if (isInlineValue()) {
1806     return true;
1807   }
1808 
1809   if (!isImmTy(ImmTyNone)) {
1810     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1811     return false;
1812   }
1813   // TODO: We should avoid using host float here. It would be better to
1814   // check the float bit values which is what a few other places do.
1815   // We've had bot failures before due to weird NaN support on mips hosts.
1816 
1817   APInt Literal(64, Imm.Val);
1818 
1819   if (Imm.IsFPImm) { // We got fp literal token
1820     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1821       return AMDGPU::isInlinableLiteral64(Imm.Val,
1822                                           AsmParser->hasInv2PiInlineImm());
1823     }
1824 
1825     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1826     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1827       return false;
1828 
1829     if (type.getScalarSizeInBits() == 16) {
1830       return isInlineableLiteralOp16(
1831         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1832         type, AsmParser->hasInv2PiInlineImm());
1833     }
1834 
1835     // Check if single precision literal is inlinable
1836     return AMDGPU::isInlinableLiteral32(
1837       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1838       AsmParser->hasInv2PiInlineImm());
1839   }
1840 
1841   // We got int literal token.
1842   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1843     return AMDGPU::isInlinableLiteral64(Imm.Val,
1844                                         AsmParser->hasInv2PiInlineImm());
1845   }
1846 
1847   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1848     return false;
1849   }
1850 
1851   if (type.getScalarSizeInBits() == 16) {
1852     return isInlineableLiteralOp16(
1853       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1854       type, AsmParser->hasInv2PiInlineImm());
1855   }
1856 
1857   return AMDGPU::isInlinableLiteral32(
1858     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1859     AsmParser->hasInv2PiInlineImm());
1860 }
1861 
1862 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1863   // Check that this immediate can be added as literal
1864   if (!isImmTy(ImmTyNone)) {
1865     return false;
1866   }
1867 
1868   if (!Imm.IsFPImm) {
1869     // We got int literal token.
1870 
1871     if (type == MVT::f64 && hasFPModifiers()) {
1872       // Cannot apply fp modifiers to int literals preserving the same semantics
1873       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1874       // disable these cases.
1875       return false;
1876     }
1877 
1878     unsigned Size = type.getSizeInBits();
1879     if (Size == 64)
1880       Size = 32;
1881 
1882     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1883     // types.
1884     return isSafeTruncation(Imm.Val, Size);
1885   }
1886 
1887   // We got fp literal token
1888   if (type == MVT::f64) { // Expected 64-bit fp operand
1889     // We would set low 64-bits of literal to zeroes but we accept this literals
1890     return true;
1891   }
1892 
1893   if (type == MVT::i64) { // Expected 64-bit int operand
1894     // We don't allow fp literals in 64-bit integer instructions. It is
1895     // unclear how we should encode them.
1896     return false;
1897   }
1898 
1899   // We allow fp literals with f16x2 operands assuming that the specified
1900   // literal goes into the lower half and the upper half is zero. We also
1901   // require that the literal may be losslesly converted to f16.
1902   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1903                      (type == MVT::v2i16)? MVT::i16 :
1904                      (type == MVT::v2f32)? MVT::f32 : type;
1905 
1906   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1907   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1908 }
1909 
1910 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1911   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1912 }
1913 
1914 bool AMDGPUOperand::isVRegWithInputMods() const {
1915   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1916          // GFX90A allows DPP on 64-bit operands.
1917          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1918           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1922   if (AsmParser->isVI())
1923     return isVReg32();
1924   else if (AsmParser->isGFX9Plus())
1925     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1926   else
1927     return false;
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAFP16Operand() const {
1931   return isSDWAOperand(MVT::f16);
1932 }
1933 
1934 bool AMDGPUOperand::isSDWAFP32Operand() const {
1935   return isSDWAOperand(MVT::f32);
1936 }
1937 
1938 bool AMDGPUOperand::isSDWAInt16Operand() const {
1939   return isSDWAOperand(MVT::i16);
1940 }
1941 
1942 bool AMDGPUOperand::isSDWAInt32Operand() const {
1943   return isSDWAOperand(MVT::i32);
1944 }
1945 
1946 bool AMDGPUOperand::isBoolReg() const {
1947   auto FB = AsmParser->getFeatureBits();
1948   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1949                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1950 }
1951 
1952 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1953 {
1954   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1955   assert(Size == 2 || Size == 4 || Size == 8);
1956 
1957   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1958 
1959   if (Imm.Mods.Abs) {
1960     Val &= ~FpSignMask;
1961   }
1962   if (Imm.Mods.Neg) {
1963     Val ^= FpSignMask;
1964   }
1965 
1966   return Val;
1967 }
1968 
1969 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1970   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1971                              Inst.getNumOperands())) {
1972     addLiteralImmOperand(Inst, Imm.Val,
1973                          ApplyModifiers &
1974                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1975   } else {
1976     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1977     Inst.addOperand(MCOperand::createImm(Imm.Val));
1978     setImmKindNone();
1979   }
1980 }
1981 
1982 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1983   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1984   auto OpNum = Inst.getNumOperands();
1985   // Check that this operand accepts literals
1986   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1987 
1988   if (ApplyModifiers) {
1989     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1990     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1991     Val = applyInputFPModifiers(Val, Size);
1992   }
1993 
1994   APInt Literal(64, Val);
1995   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1996 
1997   if (Imm.IsFPImm) { // We got fp literal token
1998     switch (OpTy) {
1999     case AMDGPU::OPERAND_REG_IMM_INT64:
2000     case AMDGPU::OPERAND_REG_IMM_FP64:
2001     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2002     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2003     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2004       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2005                                        AsmParser->hasInv2PiInlineImm())) {
2006         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2007         setImmKindConst();
2008         return;
2009       }
2010 
2011       // Non-inlineable
2012       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2013         // For fp operands we check if low 32 bits are zeros
2014         if (Literal.getLoBits(32) != 0) {
2015           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2016           "Can't encode literal as exact 64-bit floating-point operand. "
2017           "Low 32-bits will be set to zero");
2018         }
2019 
2020         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2021         setImmKindLiteral();
2022         return;
2023       }
2024 
2025       // We don't allow fp literals in 64-bit integer instructions. It is
2026       // unclear how we should encode them. This case should be checked earlier
2027       // in predicate methods (isLiteralImm())
2028       llvm_unreachable("fp literal in 64-bit integer instruction.");
2029 
2030     case AMDGPU::OPERAND_REG_IMM_INT32:
2031     case AMDGPU::OPERAND_REG_IMM_FP32:
2032     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2033     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2034     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2036     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2037     case AMDGPU::OPERAND_REG_IMM_INT16:
2038     case AMDGPU::OPERAND_REG_IMM_FP16:
2039     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2040     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2041     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2042     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2043     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2044     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2045     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2046     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2047     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2048     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2049     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2050     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2051     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2052     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2053     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2054     case AMDGPU::OPERAND_KIMM32:
2055     case AMDGPU::OPERAND_KIMM16: {
2056       bool lost;
2057       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2058       // Convert literal to single precision
2059       FPLiteral.convert(*getOpFltSemantics(OpTy),
2060                         APFloat::rmNearestTiesToEven, &lost);
2061       // We allow precision lost but not overflow or underflow. This should be
2062       // checked earlier in isLiteralImm()
2063 
2064       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2065       Inst.addOperand(MCOperand::createImm(ImmVal));
2066       setImmKindLiteral();
2067       return;
2068     }
2069     default:
2070       llvm_unreachable("invalid operand size");
2071     }
2072 
2073     return;
2074   }
2075 
2076   // We got int literal token.
2077   // Only sign extend inline immediates.
2078   switch (OpTy) {
2079   case AMDGPU::OPERAND_REG_IMM_INT32:
2080   case AMDGPU::OPERAND_REG_IMM_FP32:
2081   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2082   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2083   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2084   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2085   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2086   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2087   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2088   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2089   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2090   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2091   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2092     if (isSafeTruncation(Val, 32) &&
2093         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2094                                      AsmParser->hasInv2PiInlineImm())) {
2095       Inst.addOperand(MCOperand::createImm(Val));
2096       setImmKindConst();
2097       return;
2098     }
2099 
2100     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2101     setImmKindLiteral();
2102     return;
2103 
2104   case AMDGPU::OPERAND_REG_IMM_INT64:
2105   case AMDGPU::OPERAND_REG_IMM_FP64:
2106   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2107   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2108   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2109     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2110       Inst.addOperand(MCOperand::createImm(Val));
2111       setImmKindConst();
2112       return;
2113     }
2114 
2115     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2116     setImmKindLiteral();
2117     return;
2118 
2119   case AMDGPU::OPERAND_REG_IMM_INT16:
2120   case AMDGPU::OPERAND_REG_IMM_FP16:
2121   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2122   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2124   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2125   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2126     if (isSafeTruncation(Val, 16) &&
2127         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2128                                      AsmParser->hasInv2PiInlineImm())) {
2129       Inst.addOperand(MCOperand::createImm(Val));
2130       setImmKindConst();
2131       return;
2132     }
2133 
2134     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2135     setImmKindLiteral();
2136     return;
2137 
2138   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2139   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2140   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2141   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2142     assert(isSafeTruncation(Val, 16));
2143     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2144                                         AsmParser->hasInv2PiInlineImm()));
2145 
2146     Inst.addOperand(MCOperand::createImm(Val));
2147     return;
2148   }
2149   case AMDGPU::OPERAND_KIMM32:
2150     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2151     setImmKindNone();
2152     return;
2153   case AMDGPU::OPERAND_KIMM16:
2154     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2155     setImmKindNone();
2156     return;
2157   default:
2158     llvm_unreachable("invalid operand size");
2159   }
2160 }
2161 
2162 template <unsigned Bitwidth>
2163 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2164   APInt Literal(64, Imm.Val);
2165   setImmKindNone();
2166 
2167   if (!Imm.IsFPImm) {
2168     // We got int literal token.
2169     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2170     return;
2171   }
2172 
2173   bool Lost;
2174   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2175   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2176                     APFloat::rmNearestTiesToEven, &Lost);
2177   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2178 }
2179 
2180 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2181   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2182 }
2183 
2184 static bool isInlineValue(unsigned Reg) {
2185   switch (Reg) {
2186   case AMDGPU::SRC_SHARED_BASE:
2187   case AMDGPU::SRC_SHARED_LIMIT:
2188   case AMDGPU::SRC_PRIVATE_BASE:
2189   case AMDGPU::SRC_PRIVATE_LIMIT:
2190   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2191     return true;
2192   case AMDGPU::SRC_VCCZ:
2193   case AMDGPU::SRC_EXECZ:
2194   case AMDGPU::SRC_SCC:
2195     return true;
2196   case AMDGPU::SGPR_NULL:
2197     return true;
2198   default:
2199     return false;
2200   }
2201 }
2202 
2203 bool AMDGPUOperand::isInlineValue() const {
2204   return isRegKind() && ::isInlineValue(getReg());
2205 }
2206 
2207 //===----------------------------------------------------------------------===//
2208 // AsmParser
2209 //===----------------------------------------------------------------------===//
2210 
2211 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2212   if (Is == IS_VGPR) {
2213     switch (RegWidth) {
2214       default: return -1;
2215       case 1: return AMDGPU::VGPR_32RegClassID;
2216       case 2: return AMDGPU::VReg_64RegClassID;
2217       case 3: return AMDGPU::VReg_96RegClassID;
2218       case 4: return AMDGPU::VReg_128RegClassID;
2219       case 5: return AMDGPU::VReg_160RegClassID;
2220       case 6: return AMDGPU::VReg_192RegClassID;
2221       case 7: return AMDGPU::VReg_224RegClassID;
2222       case 8: return AMDGPU::VReg_256RegClassID;
2223       case 16: return AMDGPU::VReg_512RegClassID;
2224       case 32: return AMDGPU::VReg_1024RegClassID;
2225     }
2226   } else if (Is == IS_TTMP) {
2227     switch (RegWidth) {
2228       default: return -1;
2229       case 1: return AMDGPU::TTMP_32RegClassID;
2230       case 2: return AMDGPU::TTMP_64RegClassID;
2231       case 4: return AMDGPU::TTMP_128RegClassID;
2232       case 8: return AMDGPU::TTMP_256RegClassID;
2233       case 16: return AMDGPU::TTMP_512RegClassID;
2234     }
2235   } else if (Is == IS_SGPR) {
2236     switch (RegWidth) {
2237       default: return -1;
2238       case 1: return AMDGPU::SGPR_32RegClassID;
2239       case 2: return AMDGPU::SGPR_64RegClassID;
2240       case 3: return AMDGPU::SGPR_96RegClassID;
2241       case 4: return AMDGPU::SGPR_128RegClassID;
2242       case 5: return AMDGPU::SGPR_160RegClassID;
2243       case 6: return AMDGPU::SGPR_192RegClassID;
2244       case 7: return AMDGPU::SGPR_224RegClassID;
2245       case 8: return AMDGPU::SGPR_256RegClassID;
2246       case 16: return AMDGPU::SGPR_512RegClassID;
2247     }
2248   } else if (Is == IS_AGPR) {
2249     switch (RegWidth) {
2250       default: return -1;
2251       case 1: return AMDGPU::AGPR_32RegClassID;
2252       case 2: return AMDGPU::AReg_64RegClassID;
2253       case 3: return AMDGPU::AReg_96RegClassID;
2254       case 4: return AMDGPU::AReg_128RegClassID;
2255       case 5: return AMDGPU::AReg_160RegClassID;
2256       case 6: return AMDGPU::AReg_192RegClassID;
2257       case 7: return AMDGPU::AReg_224RegClassID;
2258       case 8: return AMDGPU::AReg_256RegClassID;
2259       case 16: return AMDGPU::AReg_512RegClassID;
2260       case 32: return AMDGPU::AReg_1024RegClassID;
2261     }
2262   }
2263   return -1;
2264 }
2265 
2266 static unsigned getSpecialRegForName(StringRef RegName) {
2267   return StringSwitch<unsigned>(RegName)
2268     .Case("exec", AMDGPU::EXEC)
2269     .Case("vcc", AMDGPU::VCC)
2270     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2271     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2272     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2273     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2274     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2275     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2276     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2277     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2278     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2279     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2280     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2281     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2282     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2283     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2284     .Case("m0", AMDGPU::M0)
2285     .Case("vccz", AMDGPU::SRC_VCCZ)
2286     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2287     .Case("execz", AMDGPU::SRC_EXECZ)
2288     .Case("src_execz", AMDGPU::SRC_EXECZ)
2289     .Case("scc", AMDGPU::SRC_SCC)
2290     .Case("src_scc", AMDGPU::SRC_SCC)
2291     .Case("tba", AMDGPU::TBA)
2292     .Case("tma", AMDGPU::TMA)
2293     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2294     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2295     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2296     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2297     .Case("vcc_lo", AMDGPU::VCC_LO)
2298     .Case("vcc_hi", AMDGPU::VCC_HI)
2299     .Case("exec_lo", AMDGPU::EXEC_LO)
2300     .Case("exec_hi", AMDGPU::EXEC_HI)
2301     .Case("tma_lo", AMDGPU::TMA_LO)
2302     .Case("tma_hi", AMDGPU::TMA_HI)
2303     .Case("tba_lo", AMDGPU::TBA_LO)
2304     .Case("tba_hi", AMDGPU::TBA_HI)
2305     .Case("pc", AMDGPU::PC_REG)
2306     .Case("null", AMDGPU::SGPR_NULL)
2307     .Default(AMDGPU::NoRegister);
2308 }
2309 
2310 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2311                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2312   auto R = parseRegister();
2313   if (!R) return true;
2314   assert(R->isReg());
2315   RegNo = R->getReg();
2316   StartLoc = R->getStartLoc();
2317   EndLoc = R->getEndLoc();
2318   return false;
2319 }
2320 
2321 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2322                                     SMLoc &EndLoc) {
2323   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2324 }
2325 
2326 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2327                                                        SMLoc &StartLoc,
2328                                                        SMLoc &EndLoc) {
2329   bool Result =
2330       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2331   bool PendingErrors = getParser().hasPendingError();
2332   getParser().clearPendingErrors();
2333   if (PendingErrors)
2334     return MatchOperand_ParseFail;
2335   if (Result)
2336     return MatchOperand_NoMatch;
2337   return MatchOperand_Success;
2338 }
2339 
2340 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2341                                             RegisterKind RegKind, unsigned Reg1,
2342                                             SMLoc Loc) {
2343   switch (RegKind) {
2344   case IS_SPECIAL:
2345     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2346       Reg = AMDGPU::EXEC;
2347       RegWidth = 2;
2348       return true;
2349     }
2350     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2351       Reg = AMDGPU::FLAT_SCR;
2352       RegWidth = 2;
2353       return true;
2354     }
2355     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2356       Reg = AMDGPU::XNACK_MASK;
2357       RegWidth = 2;
2358       return true;
2359     }
2360     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2361       Reg = AMDGPU::VCC;
2362       RegWidth = 2;
2363       return true;
2364     }
2365     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2366       Reg = AMDGPU::TBA;
2367       RegWidth = 2;
2368       return true;
2369     }
2370     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2371       Reg = AMDGPU::TMA;
2372       RegWidth = 2;
2373       return true;
2374     }
2375     Error(Loc, "register does not fit in the list");
2376     return false;
2377   case IS_VGPR:
2378   case IS_SGPR:
2379   case IS_AGPR:
2380   case IS_TTMP:
2381     if (Reg1 != Reg + RegWidth) {
2382       Error(Loc, "registers in a list must have consecutive indices");
2383       return false;
2384     }
2385     RegWidth++;
2386     return true;
2387   default:
2388     llvm_unreachable("unexpected register kind");
2389   }
2390 }
2391 
2392 struct RegInfo {
2393   StringLiteral Name;
2394   RegisterKind Kind;
2395 };
2396 
2397 static constexpr RegInfo RegularRegisters[] = {
2398   {{"v"},    IS_VGPR},
2399   {{"s"},    IS_SGPR},
2400   {{"ttmp"}, IS_TTMP},
2401   {{"acc"},  IS_AGPR},
2402   {{"a"},    IS_AGPR},
2403 };
2404 
2405 static bool isRegularReg(RegisterKind Kind) {
2406   return Kind == IS_VGPR ||
2407          Kind == IS_SGPR ||
2408          Kind == IS_TTMP ||
2409          Kind == IS_AGPR;
2410 }
2411 
2412 static const RegInfo* getRegularRegInfo(StringRef Str) {
2413   for (const RegInfo &Reg : RegularRegisters)
2414     if (Str.startswith(Reg.Name))
2415       return &Reg;
2416   return nullptr;
2417 }
2418 
2419 static bool getRegNum(StringRef Str, unsigned& Num) {
2420   return !Str.getAsInteger(10, Num);
2421 }
2422 
2423 bool
2424 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2425                             const AsmToken &NextToken) const {
2426 
2427   // A list of consecutive registers: [s0,s1,s2,s3]
2428   if (Token.is(AsmToken::LBrac))
2429     return true;
2430 
2431   if (!Token.is(AsmToken::Identifier))
2432     return false;
2433 
2434   // A single register like s0 or a range of registers like s[0:1]
2435 
2436   StringRef Str = Token.getString();
2437   const RegInfo *Reg = getRegularRegInfo(Str);
2438   if (Reg) {
2439     StringRef RegName = Reg->Name;
2440     StringRef RegSuffix = Str.substr(RegName.size());
2441     if (!RegSuffix.empty()) {
2442       unsigned Num;
2443       // A single register with an index: rXX
2444       if (getRegNum(RegSuffix, Num))
2445         return true;
2446     } else {
2447       // A range of registers: r[XX:YY].
2448       if (NextToken.is(AsmToken::LBrac))
2449         return true;
2450     }
2451   }
2452 
2453   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2454 }
2455 
2456 bool
2457 AMDGPUAsmParser::isRegister()
2458 {
2459   return isRegister(getToken(), peekToken());
2460 }
2461 
2462 unsigned
2463 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2464                                unsigned RegNum,
2465                                unsigned RegWidth,
2466                                SMLoc Loc) {
2467 
2468   assert(isRegularReg(RegKind));
2469 
2470   unsigned AlignSize = 1;
2471   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2472     // SGPR and TTMP registers must be aligned.
2473     // Max required alignment is 4 dwords.
2474     AlignSize = std::min(RegWidth, 4u);
2475   }
2476 
2477   if (RegNum % AlignSize != 0) {
2478     Error(Loc, "invalid register alignment");
2479     return AMDGPU::NoRegister;
2480   }
2481 
2482   unsigned RegIdx = RegNum / AlignSize;
2483   int RCID = getRegClass(RegKind, RegWidth);
2484   if (RCID == -1) {
2485     Error(Loc, "invalid or unsupported register size");
2486     return AMDGPU::NoRegister;
2487   }
2488 
2489   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2490   const MCRegisterClass RC = TRI->getRegClass(RCID);
2491   if (RegIdx >= RC.getNumRegs()) {
2492     Error(Loc, "register index is out of range");
2493     return AMDGPU::NoRegister;
2494   }
2495 
2496   return RC.getRegister(RegIdx);
2497 }
2498 
2499 bool
2500 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2501   int64_t RegLo, RegHi;
2502   if (!skipToken(AsmToken::LBrac, "missing register index"))
2503     return false;
2504 
2505   SMLoc FirstIdxLoc = getLoc();
2506   SMLoc SecondIdxLoc;
2507 
2508   if (!parseExpr(RegLo))
2509     return false;
2510 
2511   if (trySkipToken(AsmToken::Colon)) {
2512     SecondIdxLoc = getLoc();
2513     if (!parseExpr(RegHi))
2514       return false;
2515   } else {
2516     RegHi = RegLo;
2517   }
2518 
2519   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2520     return false;
2521 
2522   if (!isUInt<32>(RegLo)) {
2523     Error(FirstIdxLoc, "invalid register index");
2524     return false;
2525   }
2526 
2527   if (!isUInt<32>(RegHi)) {
2528     Error(SecondIdxLoc, "invalid register index");
2529     return false;
2530   }
2531 
2532   if (RegLo > RegHi) {
2533     Error(FirstIdxLoc, "first register index should not exceed second index");
2534     return false;
2535   }
2536 
2537   Num = static_cast<unsigned>(RegLo);
2538   Width = (RegHi - RegLo) + 1;
2539   return true;
2540 }
2541 
2542 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2543                                           unsigned &RegNum, unsigned &RegWidth,
2544                                           SmallVectorImpl<AsmToken> &Tokens) {
2545   assert(isToken(AsmToken::Identifier));
2546   unsigned Reg = getSpecialRegForName(getTokenStr());
2547   if (Reg) {
2548     RegNum = 0;
2549     RegWidth = 1;
2550     RegKind = IS_SPECIAL;
2551     Tokens.push_back(getToken());
2552     lex(); // skip register name
2553   }
2554   return Reg;
2555 }
2556 
2557 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2558                                           unsigned &RegNum, unsigned &RegWidth,
2559                                           SmallVectorImpl<AsmToken> &Tokens) {
2560   assert(isToken(AsmToken::Identifier));
2561   StringRef RegName = getTokenStr();
2562   auto Loc = getLoc();
2563 
2564   const RegInfo *RI = getRegularRegInfo(RegName);
2565   if (!RI) {
2566     Error(Loc, "invalid register name");
2567     return AMDGPU::NoRegister;
2568   }
2569 
2570   Tokens.push_back(getToken());
2571   lex(); // skip register name
2572 
2573   RegKind = RI->Kind;
2574   StringRef RegSuffix = RegName.substr(RI->Name.size());
2575   if (!RegSuffix.empty()) {
2576     // Single 32-bit register: vXX.
2577     if (!getRegNum(RegSuffix, RegNum)) {
2578       Error(Loc, "invalid register index");
2579       return AMDGPU::NoRegister;
2580     }
2581     RegWidth = 1;
2582   } else {
2583     // Range of registers: v[XX:YY]. ":YY" is optional.
2584     if (!ParseRegRange(RegNum, RegWidth))
2585       return AMDGPU::NoRegister;
2586   }
2587 
2588   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2589 }
2590 
2591 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2592                                        unsigned &RegWidth,
2593                                        SmallVectorImpl<AsmToken> &Tokens) {
2594   unsigned Reg = AMDGPU::NoRegister;
2595   auto ListLoc = getLoc();
2596 
2597   if (!skipToken(AsmToken::LBrac,
2598                  "expected a register or a list of registers")) {
2599     return AMDGPU::NoRegister;
2600   }
2601 
2602   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2603 
2604   auto Loc = getLoc();
2605   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2606     return AMDGPU::NoRegister;
2607   if (RegWidth != 1) {
2608     Error(Loc, "expected a single 32-bit register");
2609     return AMDGPU::NoRegister;
2610   }
2611 
2612   for (; trySkipToken(AsmToken::Comma); ) {
2613     RegisterKind NextRegKind;
2614     unsigned NextReg, NextRegNum, NextRegWidth;
2615     Loc = getLoc();
2616 
2617     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2618                              NextRegNum, NextRegWidth,
2619                              Tokens)) {
2620       return AMDGPU::NoRegister;
2621     }
2622     if (NextRegWidth != 1) {
2623       Error(Loc, "expected a single 32-bit register");
2624       return AMDGPU::NoRegister;
2625     }
2626     if (NextRegKind != RegKind) {
2627       Error(Loc, "registers in a list must be of the same kind");
2628       return AMDGPU::NoRegister;
2629     }
2630     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2631       return AMDGPU::NoRegister;
2632   }
2633 
2634   if (!skipToken(AsmToken::RBrac,
2635                  "expected a comma or a closing square bracket")) {
2636     return AMDGPU::NoRegister;
2637   }
2638 
2639   if (isRegularReg(RegKind))
2640     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2641 
2642   return Reg;
2643 }
2644 
2645 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2646                                           unsigned &RegNum, unsigned &RegWidth,
2647                                           SmallVectorImpl<AsmToken> &Tokens) {
2648   auto Loc = getLoc();
2649   Reg = AMDGPU::NoRegister;
2650 
2651   if (isToken(AsmToken::Identifier)) {
2652     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2653     if (Reg == AMDGPU::NoRegister)
2654       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2655   } else {
2656     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2657   }
2658 
2659   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2660   if (Reg == AMDGPU::NoRegister) {
2661     assert(Parser.hasPendingError());
2662     return false;
2663   }
2664 
2665   if (!subtargetHasRegister(*TRI, Reg)) {
2666     if (Reg == AMDGPU::SGPR_NULL) {
2667       Error(Loc, "'null' operand is not supported on this GPU");
2668     } else {
2669       Error(Loc, "register not available on this GPU");
2670     }
2671     return false;
2672   }
2673 
2674   return true;
2675 }
2676 
2677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2678                                           unsigned &RegNum, unsigned &RegWidth,
2679                                           bool RestoreOnFailure /*=false*/) {
2680   Reg = AMDGPU::NoRegister;
2681 
2682   SmallVector<AsmToken, 1> Tokens;
2683   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2684     if (RestoreOnFailure) {
2685       while (!Tokens.empty()) {
2686         getLexer().UnLex(Tokens.pop_back_val());
2687       }
2688     }
2689     return true;
2690   }
2691   return false;
2692 }
2693 
2694 Optional<StringRef>
2695 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2696   switch (RegKind) {
2697   case IS_VGPR:
2698     return StringRef(".amdgcn.next_free_vgpr");
2699   case IS_SGPR:
2700     return StringRef(".amdgcn.next_free_sgpr");
2701   default:
2702     return None;
2703   }
2704 }
2705 
2706 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2707   auto SymbolName = getGprCountSymbolName(RegKind);
2708   assert(SymbolName && "initializing invalid register kind");
2709   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2710   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2711 }
2712 
2713 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2714                                             unsigned DwordRegIndex,
2715                                             unsigned RegWidth) {
2716   // Symbols are only defined for GCN targets
2717   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2718     return true;
2719 
2720   auto SymbolName = getGprCountSymbolName(RegKind);
2721   if (!SymbolName)
2722     return true;
2723   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2724 
2725   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2726   int64_t OldCount;
2727 
2728   if (!Sym->isVariable())
2729     return !Error(getLoc(),
2730                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2731   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2732     return !Error(
2733         getLoc(),
2734         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2735 
2736   if (OldCount <= NewMax)
2737     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2738 
2739   return true;
2740 }
2741 
2742 std::unique_ptr<AMDGPUOperand>
2743 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2744   const auto &Tok = getToken();
2745   SMLoc StartLoc = Tok.getLoc();
2746   SMLoc EndLoc = Tok.getEndLoc();
2747   RegisterKind RegKind;
2748   unsigned Reg, RegNum, RegWidth;
2749 
2750   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2751     return nullptr;
2752   }
2753   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2754     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2755       return nullptr;
2756   } else
2757     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2758   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2759 }
2760 
2761 OperandMatchResultTy
2762 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2763   // TODO: add syntactic sugar for 1/(2*PI)
2764 
2765   assert(!isRegister());
2766   assert(!isModifier());
2767 
2768   const auto& Tok = getToken();
2769   const auto& NextTok = peekToken();
2770   bool IsReal = Tok.is(AsmToken::Real);
2771   SMLoc S = getLoc();
2772   bool Negate = false;
2773 
2774   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2775     lex();
2776     IsReal = true;
2777     Negate = true;
2778   }
2779 
2780   if (IsReal) {
2781     // Floating-point expressions are not supported.
2782     // Can only allow floating-point literals with an
2783     // optional sign.
2784 
2785     StringRef Num = getTokenStr();
2786     lex();
2787 
2788     APFloat RealVal(APFloat::IEEEdouble());
2789     auto roundMode = APFloat::rmNearestTiesToEven;
2790     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2791       return MatchOperand_ParseFail;
2792     }
2793     if (Negate)
2794       RealVal.changeSign();
2795 
2796     Operands.push_back(
2797       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2798                                AMDGPUOperand::ImmTyNone, true));
2799 
2800     return MatchOperand_Success;
2801 
2802   } else {
2803     int64_t IntVal;
2804     const MCExpr *Expr;
2805     SMLoc S = getLoc();
2806 
2807     if (HasSP3AbsModifier) {
2808       // This is a workaround for handling expressions
2809       // as arguments of SP3 'abs' modifier, for example:
2810       //     |1.0|
2811       //     |-1|
2812       //     |1+x|
2813       // This syntax is not compatible with syntax of standard
2814       // MC expressions (due to the trailing '|').
2815       SMLoc EndLoc;
2816       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2817         return MatchOperand_ParseFail;
2818     } else {
2819       if (Parser.parseExpression(Expr))
2820         return MatchOperand_ParseFail;
2821     }
2822 
2823     if (Expr->evaluateAsAbsolute(IntVal)) {
2824       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2825     } else {
2826       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2827     }
2828 
2829     return MatchOperand_Success;
2830   }
2831 
2832   return MatchOperand_NoMatch;
2833 }
2834 
2835 OperandMatchResultTy
2836 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2837   if (!isRegister())
2838     return MatchOperand_NoMatch;
2839 
2840   if (auto R = parseRegister()) {
2841     assert(R->isReg());
2842     Operands.push_back(std::move(R));
2843     return MatchOperand_Success;
2844   }
2845   return MatchOperand_ParseFail;
2846 }
2847 
2848 OperandMatchResultTy
2849 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2850   auto res = parseReg(Operands);
2851   if (res != MatchOperand_NoMatch) {
2852     return res;
2853   } else if (isModifier()) {
2854     return MatchOperand_NoMatch;
2855   } else {
2856     return parseImm(Operands, HasSP3AbsMod);
2857   }
2858 }
2859 
2860 bool
2861 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2862   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2863     const auto &str = Token.getString();
2864     return str == "abs" || str == "neg" || str == "sext";
2865   }
2866   return false;
2867 }
2868 
2869 bool
2870 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2871   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2872 }
2873 
2874 bool
2875 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2876   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2877 }
2878 
2879 bool
2880 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2881   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2882 }
2883 
2884 // Check if this is an operand modifier or an opcode modifier
2885 // which may look like an expression but it is not. We should
2886 // avoid parsing these modifiers as expressions. Currently
2887 // recognized sequences are:
2888 //   |...|
2889 //   abs(...)
2890 //   neg(...)
2891 //   sext(...)
2892 //   -reg
2893 //   -|...|
2894 //   -abs(...)
2895 //   name:...
2896 // Note that simple opcode modifiers like 'gds' may be parsed as
2897 // expressions; this is a special case. See getExpressionAsToken.
2898 //
2899 bool
2900 AMDGPUAsmParser::isModifier() {
2901 
2902   AsmToken Tok = getToken();
2903   AsmToken NextToken[2];
2904   peekTokens(NextToken);
2905 
2906   return isOperandModifier(Tok, NextToken[0]) ||
2907          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2908          isOpcodeModifierWithVal(Tok, NextToken[0]);
2909 }
2910 
2911 // Check if the current token is an SP3 'neg' modifier.
2912 // Currently this modifier is allowed in the following context:
2913 //
2914 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2915 // 2. Before an 'abs' modifier: -abs(...)
2916 // 3. Before an SP3 'abs' modifier: -|...|
2917 //
2918 // In all other cases "-" is handled as a part
2919 // of an expression that follows the sign.
2920 //
2921 // Note: When "-" is followed by an integer literal,
2922 // this is interpreted as integer negation rather
2923 // than a floating-point NEG modifier applied to N.
2924 // Beside being contr-intuitive, such use of floating-point
2925 // NEG modifier would have resulted in different meaning
2926 // of integer literals used with VOP1/2/C and VOP3,
2927 // for example:
2928 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2929 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2930 // Negative fp literals with preceding "-" are
2931 // handled likewise for unifomtity
2932 //
2933 bool
2934 AMDGPUAsmParser::parseSP3NegModifier() {
2935 
2936   AsmToken NextToken[2];
2937   peekTokens(NextToken);
2938 
2939   if (isToken(AsmToken::Minus) &&
2940       (isRegister(NextToken[0], NextToken[1]) ||
2941        NextToken[0].is(AsmToken::Pipe) ||
2942        isId(NextToken[0], "abs"))) {
2943     lex();
2944     return true;
2945   }
2946 
2947   return false;
2948 }
2949 
2950 OperandMatchResultTy
2951 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2952                                               bool AllowImm) {
2953   bool Neg, SP3Neg;
2954   bool Abs, SP3Abs;
2955   SMLoc Loc;
2956 
2957   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2958   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2959     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2960     return MatchOperand_ParseFail;
2961   }
2962 
2963   SP3Neg = parseSP3NegModifier();
2964 
2965   Loc = getLoc();
2966   Neg = trySkipId("neg");
2967   if (Neg && SP3Neg) {
2968     Error(Loc, "expected register or immediate");
2969     return MatchOperand_ParseFail;
2970   }
2971   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2972     return MatchOperand_ParseFail;
2973 
2974   Abs = trySkipId("abs");
2975   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2976     return MatchOperand_ParseFail;
2977 
2978   Loc = getLoc();
2979   SP3Abs = trySkipToken(AsmToken::Pipe);
2980   if (Abs && SP3Abs) {
2981     Error(Loc, "expected register or immediate");
2982     return MatchOperand_ParseFail;
2983   }
2984 
2985   OperandMatchResultTy Res;
2986   if (AllowImm) {
2987     Res = parseRegOrImm(Operands, SP3Abs);
2988   } else {
2989     Res = parseReg(Operands);
2990   }
2991   if (Res != MatchOperand_Success) {
2992     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2993   }
2994 
2995   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2996     return MatchOperand_ParseFail;
2997   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2998     return MatchOperand_ParseFail;
2999   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3000     return MatchOperand_ParseFail;
3001 
3002   AMDGPUOperand::Modifiers Mods;
3003   Mods.Abs = Abs || SP3Abs;
3004   Mods.Neg = Neg || SP3Neg;
3005 
3006   if (Mods.hasFPModifiers()) {
3007     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3008     if (Op.isExpr()) {
3009       Error(Op.getStartLoc(), "expected an absolute expression");
3010       return MatchOperand_ParseFail;
3011     }
3012     Op.setModifiers(Mods);
3013   }
3014   return MatchOperand_Success;
3015 }
3016 
3017 OperandMatchResultTy
3018 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3019                                                bool AllowImm) {
3020   bool Sext = trySkipId("sext");
3021   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3022     return MatchOperand_ParseFail;
3023 
3024   OperandMatchResultTy Res;
3025   if (AllowImm) {
3026     Res = parseRegOrImm(Operands);
3027   } else {
3028     Res = parseReg(Operands);
3029   }
3030   if (Res != MatchOperand_Success) {
3031     return Sext? MatchOperand_ParseFail : Res;
3032   }
3033 
3034   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3035     return MatchOperand_ParseFail;
3036 
3037   AMDGPUOperand::Modifiers Mods;
3038   Mods.Sext = Sext;
3039 
3040   if (Mods.hasIntModifiers()) {
3041     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3042     if (Op.isExpr()) {
3043       Error(Op.getStartLoc(), "expected an absolute expression");
3044       return MatchOperand_ParseFail;
3045     }
3046     Op.setModifiers(Mods);
3047   }
3048 
3049   return MatchOperand_Success;
3050 }
3051 
3052 OperandMatchResultTy
3053 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3054   return parseRegOrImmWithFPInputMods(Operands, false);
3055 }
3056 
3057 OperandMatchResultTy
3058 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3059   return parseRegOrImmWithIntInputMods(Operands, false);
3060 }
3061 
3062 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3063   auto Loc = getLoc();
3064   if (trySkipId("off")) {
3065     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3066                                                 AMDGPUOperand::ImmTyOff, false));
3067     return MatchOperand_Success;
3068   }
3069 
3070   if (!isRegister())
3071     return MatchOperand_NoMatch;
3072 
3073   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3074   if (Reg) {
3075     Operands.push_back(std::move(Reg));
3076     return MatchOperand_Success;
3077   }
3078 
3079   return MatchOperand_ParseFail;
3080 
3081 }
3082 
3083 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3084   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3085 
3086   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3087       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3088       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3089       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3090     return Match_InvalidOperand;
3091 
3092   if ((TSFlags & SIInstrFlags::VOP3) &&
3093       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3094       getForcedEncodingSize() != 64)
3095     return Match_PreferE32;
3096 
3097   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3098       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3099     // v_mac_f32/16 allow only dst_sel == DWORD;
3100     auto OpNum =
3101         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3102     const auto &Op = Inst.getOperand(OpNum);
3103     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3104       return Match_InvalidOperand;
3105     }
3106   }
3107 
3108   return Match_Success;
3109 }
3110 
3111 static ArrayRef<unsigned> getAllVariants() {
3112   static const unsigned Variants[] = {
3113     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3114     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3115   };
3116 
3117   return makeArrayRef(Variants);
3118 }
3119 
3120 // What asm variants we should check
3121 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3122   if (getForcedEncodingSize() == 32) {
3123     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3124     return makeArrayRef(Variants);
3125   }
3126 
3127   if (isForcedVOP3()) {
3128     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3129     return makeArrayRef(Variants);
3130   }
3131 
3132   if (isForcedSDWA()) {
3133     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3134                                         AMDGPUAsmVariants::SDWA9};
3135     return makeArrayRef(Variants);
3136   }
3137 
3138   if (isForcedDPP()) {
3139     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3140     return makeArrayRef(Variants);
3141   }
3142 
3143   return getAllVariants();
3144 }
3145 
3146 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3147   if (getForcedEncodingSize() == 32)
3148     return "e32";
3149 
3150   if (isForcedVOP3())
3151     return "e64";
3152 
3153   if (isForcedSDWA())
3154     return "sdwa";
3155 
3156   if (isForcedDPP())
3157     return "dpp";
3158 
3159   return "";
3160 }
3161 
3162 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3163   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3164   const unsigned Num = Desc.getNumImplicitUses();
3165   for (unsigned i = 0; i < Num; ++i) {
3166     unsigned Reg = Desc.ImplicitUses[i];
3167     switch (Reg) {
3168     case AMDGPU::FLAT_SCR:
3169     case AMDGPU::VCC:
3170     case AMDGPU::VCC_LO:
3171     case AMDGPU::VCC_HI:
3172     case AMDGPU::M0:
3173       return Reg;
3174     default:
3175       break;
3176     }
3177   }
3178   return AMDGPU::NoRegister;
3179 }
3180 
3181 // NB: This code is correct only when used to check constant
3182 // bus limitations because GFX7 support no f16 inline constants.
3183 // Note that there are no cases when a GFX7 opcode violates
3184 // constant bus limitations due to the use of an f16 constant.
3185 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3186                                        unsigned OpIdx) const {
3187   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3188 
3189   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3190     return false;
3191   }
3192 
3193   const MCOperand &MO = Inst.getOperand(OpIdx);
3194 
3195   int64_t Val = MO.getImm();
3196   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3197 
3198   switch (OpSize) { // expected operand size
3199   case 8:
3200     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3201   case 4:
3202     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3203   case 2: {
3204     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3205     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3206         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3207         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3208       return AMDGPU::isInlinableIntLiteral(Val);
3209 
3210     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3211         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3212         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3213       return AMDGPU::isInlinableIntLiteralV216(Val);
3214 
3215     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3216         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3217         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3218       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3219 
3220     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3221   }
3222   default:
3223     llvm_unreachable("invalid operand size");
3224   }
3225 }
3226 
3227 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3228   if (!isGFX10Plus())
3229     return 1;
3230 
3231   switch (Opcode) {
3232   // 64-bit shift instructions can use only one scalar value input
3233   case AMDGPU::V_LSHLREV_B64_e64:
3234   case AMDGPU::V_LSHLREV_B64_gfx10:
3235   case AMDGPU::V_LSHRREV_B64_e64:
3236   case AMDGPU::V_LSHRREV_B64_gfx10:
3237   case AMDGPU::V_ASHRREV_I64_e64:
3238   case AMDGPU::V_ASHRREV_I64_gfx10:
3239   case AMDGPU::V_LSHL_B64_e64:
3240   case AMDGPU::V_LSHR_B64_e64:
3241   case AMDGPU::V_ASHR_I64_e64:
3242     return 1;
3243   default:
3244     return 2;
3245   }
3246 }
3247 
3248 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3249   const MCOperand &MO = Inst.getOperand(OpIdx);
3250   if (MO.isImm()) {
3251     return !isInlineConstant(Inst, OpIdx);
3252   } else if (MO.isReg()) {
3253     auto Reg = MO.getReg();
3254     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3255     auto PReg = mc2PseudoReg(Reg);
3256     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3257   } else {
3258     return true;
3259   }
3260 }
3261 
3262 bool
3263 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3264                                                 const OperandVector &Operands) {
3265   const unsigned Opcode = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opcode);
3267   unsigned LastSGPR = AMDGPU::NoRegister;
3268   unsigned ConstantBusUseCount = 0;
3269   unsigned NumLiterals = 0;
3270   unsigned LiteralSize;
3271 
3272   if (Desc.TSFlags &
3273       (SIInstrFlags::VOPC |
3274        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3275        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3276        SIInstrFlags::SDWA)) {
3277     // Check special imm operands (used by madmk, etc)
3278     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3279       ++NumLiterals;
3280       LiteralSize = 4;
3281     }
3282 
3283     SmallDenseSet<unsigned> SGPRsUsed;
3284     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3285     if (SGPRUsed != AMDGPU::NoRegister) {
3286       SGPRsUsed.insert(SGPRUsed);
3287       ++ConstantBusUseCount;
3288     }
3289 
3290     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3291     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3292     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3293 
3294     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3295 
3296     for (int OpIdx : OpIndices) {
3297       if (OpIdx == -1) break;
3298 
3299       const MCOperand &MO = Inst.getOperand(OpIdx);
3300       if (usesConstantBus(Inst, OpIdx)) {
3301         if (MO.isReg()) {
3302           LastSGPR = mc2PseudoReg(MO.getReg());
3303           // Pairs of registers with a partial intersections like these
3304           //   s0, s[0:1]
3305           //   flat_scratch_lo, flat_scratch
3306           //   flat_scratch_lo, flat_scratch_hi
3307           // are theoretically valid but they are disabled anyway.
3308           // Note that this code mimics SIInstrInfo::verifyInstruction
3309           if (!SGPRsUsed.count(LastSGPR)) {
3310             SGPRsUsed.insert(LastSGPR);
3311             ++ConstantBusUseCount;
3312           }
3313         } else { // Expression or a literal
3314 
3315           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3316             continue; // special operand like VINTERP attr_chan
3317 
3318           // An instruction may use only one literal.
3319           // This has been validated on the previous step.
3320           // See validateVOPLiteral.
3321           // This literal may be used as more than one operand.
3322           // If all these operands are of the same size,
3323           // this literal counts as one scalar value.
3324           // Otherwise it counts as 2 scalar values.
3325           // See "GFX10 Shader Programming", section 3.6.2.3.
3326 
3327           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3328           if (Size < 4) Size = 4;
3329 
3330           if (NumLiterals == 0) {
3331             NumLiterals = 1;
3332             LiteralSize = Size;
3333           } else if (LiteralSize != Size) {
3334             NumLiterals = 2;
3335           }
3336         }
3337       }
3338     }
3339   }
3340   ConstantBusUseCount += NumLiterals;
3341 
3342   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3343     return true;
3344 
3345   SMLoc LitLoc = getLitLoc(Operands);
3346   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3347   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3348   Error(Loc, "invalid operand (violates constant bus restrictions)");
3349   return false;
3350 }
3351 
3352 bool
3353 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3354                                                  const OperandVector &Operands) {
3355   const unsigned Opcode = Inst.getOpcode();
3356   const MCInstrDesc &Desc = MII.get(Opcode);
3357 
3358   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3359   if (DstIdx == -1 ||
3360       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3361     return true;
3362   }
3363 
3364   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3365 
3366   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3367   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3368   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3369 
3370   assert(DstIdx != -1);
3371   const MCOperand &Dst = Inst.getOperand(DstIdx);
3372   assert(Dst.isReg());
3373   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3374 
3375   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3376 
3377   for (int SrcIdx : SrcIndices) {
3378     if (SrcIdx == -1) break;
3379     const MCOperand &Src = Inst.getOperand(SrcIdx);
3380     if (Src.isReg()) {
3381       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3382       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3383         Error(getRegLoc(SrcReg, Operands),
3384           "destination must be different than all sources");
3385         return false;
3386       }
3387     }
3388   }
3389 
3390   return true;
3391 }
3392 
3393 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3394 
3395   const unsigned Opc = Inst.getOpcode();
3396   const MCInstrDesc &Desc = MII.get(Opc);
3397 
3398   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3399     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3400     assert(ClampIdx != -1);
3401     return Inst.getOperand(ClampIdx).getImm() == 0;
3402   }
3403 
3404   return true;
3405 }
3406 
3407 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3408 
3409   const unsigned Opc = Inst.getOpcode();
3410   const MCInstrDesc &Desc = MII.get(Opc);
3411 
3412   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3413     return true;
3414 
3415   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3416   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3417   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3418 
3419   assert(VDataIdx != -1);
3420 
3421   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3422     return true;
3423 
3424   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3425   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3426   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3427   if (DMask == 0)
3428     DMask = 1;
3429 
3430   unsigned DataSize =
3431     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3432   if (hasPackedD16()) {
3433     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3434     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3435       DataSize = (DataSize + 1) / 2;
3436   }
3437 
3438   return (VDataSize / 4) == DataSize + TFESize;
3439 }
3440 
3441 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3442   const unsigned Opc = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opc);
3444 
3445   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3446     return true;
3447 
3448   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3449 
3450   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3451       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3452   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3453   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3454   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3455   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3456 
3457   assert(VAddr0Idx != -1);
3458   assert(SrsrcIdx != -1);
3459   assert(SrsrcIdx > VAddr0Idx);
3460 
3461   if (DimIdx == -1)
3462     return true; // intersect_ray
3463 
3464   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3465   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3466   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3467   unsigned ActualAddrSize =
3468       IsNSA ? SrsrcIdx - VAddr0Idx
3469             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3470   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3471 
3472   unsigned ExpectedAddrSize =
3473       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3474 
3475   if (!IsNSA) {
3476     if (ExpectedAddrSize > 8)
3477       ExpectedAddrSize = 16;
3478 
3479     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3480     // This provides backward compatibility for assembly created
3481     // before 160b/192b/224b types were directly supported.
3482     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3483       return true;
3484   }
3485 
3486   return ActualAddrSize == ExpectedAddrSize;
3487 }
3488 
3489 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3490 
3491   const unsigned Opc = Inst.getOpcode();
3492   const MCInstrDesc &Desc = MII.get(Opc);
3493 
3494   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3495     return true;
3496   if (!Desc.mayLoad() || !Desc.mayStore())
3497     return true; // Not atomic
3498 
3499   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3500   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3501 
3502   // This is an incomplete check because image_atomic_cmpswap
3503   // may only use 0x3 and 0xf while other atomic operations
3504   // may use 0x1 and 0x3. However these limitations are
3505   // verified when we check that dmask matches dst size.
3506   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3507 }
3508 
3509 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3510 
3511   const unsigned Opc = Inst.getOpcode();
3512   const MCInstrDesc &Desc = MII.get(Opc);
3513 
3514   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3515     return true;
3516 
3517   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3518   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3519 
3520   // GATHER4 instructions use dmask in a different fashion compared to
3521   // other MIMG instructions. The only useful DMASK values are
3522   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3523   // (red,red,red,red) etc.) The ISA document doesn't mention
3524   // this.
3525   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3526 }
3527 
3528 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3529   const unsigned Opc = Inst.getOpcode();
3530   const MCInstrDesc &Desc = MII.get(Opc);
3531 
3532   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3533     return true;
3534 
3535   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3536   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3537       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3538 
3539   if (!BaseOpcode->MSAA)
3540     return true;
3541 
3542   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3543   assert(DimIdx != -1);
3544 
3545   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3546   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3547 
3548   return DimInfo->MSAA;
3549 }
3550 
3551 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3552 {
3553   switch (Opcode) {
3554   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3555   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3556   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3557     return true;
3558   default:
3559     return false;
3560   }
3561 }
3562 
3563 // movrels* opcodes should only allow VGPRS as src0.
3564 // This is specified in .td description for vop1/vop3,
3565 // but sdwa is handled differently. See isSDWAOperand.
3566 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3567                                       const OperandVector &Operands) {
3568 
3569   const unsigned Opc = Inst.getOpcode();
3570   const MCInstrDesc &Desc = MII.get(Opc);
3571 
3572   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3573     return true;
3574 
3575   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3576   assert(Src0Idx != -1);
3577 
3578   SMLoc ErrLoc;
3579   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3580   if (Src0.isReg()) {
3581     auto Reg = mc2PseudoReg(Src0.getReg());
3582     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3583     if (!isSGPR(Reg, TRI))
3584       return true;
3585     ErrLoc = getRegLoc(Reg, Operands);
3586   } else {
3587     ErrLoc = getConstLoc(Operands);
3588   }
3589 
3590   Error(ErrLoc, "source operand must be a VGPR");
3591   return false;
3592 }
3593 
3594 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3595                                           const OperandVector &Operands) {
3596 
3597   const unsigned Opc = Inst.getOpcode();
3598 
3599   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3600     return true;
3601 
3602   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3603   assert(Src0Idx != -1);
3604 
3605   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3606   if (!Src0.isReg())
3607     return true;
3608 
3609   auto Reg = mc2PseudoReg(Src0.getReg());
3610   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3611   if (isSGPR(Reg, TRI)) {
3612     Error(getRegLoc(Reg, Operands),
3613           "source operand must be either a VGPR or an inline constant");
3614     return false;
3615   }
3616 
3617   return true;
3618 }
3619 
3620 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3621                                    const OperandVector &Operands) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3626     return true;
3627 
3628   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3629   if (Src2Idx == -1)
3630     return true;
3631 
3632   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3633   if (!Src2.isReg())
3634     return true;
3635 
3636   MCRegister Src2Reg = Src2.getReg();
3637   MCRegister DstReg = Inst.getOperand(0).getReg();
3638   if (Src2Reg == DstReg)
3639     return true;
3640 
3641   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3642   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3643     return true;
3644 
3645   if (isRegIntersect(Src2Reg, DstReg, TRI)) {
3646     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3647           "source 2 operand must not partially overlap with dst");
3648     return false;
3649   }
3650 
3651   return true;
3652 }
3653 
3654 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3655   switch (Inst.getOpcode()) {
3656   default:
3657     return true;
3658   case V_DIV_SCALE_F32_gfx6_gfx7:
3659   case V_DIV_SCALE_F32_vi:
3660   case V_DIV_SCALE_F32_gfx10:
3661   case V_DIV_SCALE_F64_gfx6_gfx7:
3662   case V_DIV_SCALE_F64_vi:
3663   case V_DIV_SCALE_F64_gfx10:
3664     break;
3665   }
3666 
3667   // TODO: Check that src0 = src1 or src2.
3668 
3669   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3670                     AMDGPU::OpName::src2_modifiers,
3671                     AMDGPU::OpName::src2_modifiers}) {
3672     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3673             .getImm() &
3674         SISrcMods::ABS) {
3675       return false;
3676     }
3677   }
3678 
3679   return true;
3680 }
3681 
3682 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3683 
3684   const unsigned Opc = Inst.getOpcode();
3685   const MCInstrDesc &Desc = MII.get(Opc);
3686 
3687   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3688     return true;
3689 
3690   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3691   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3692     if (isCI() || isSI())
3693       return false;
3694   }
3695 
3696   return true;
3697 }
3698 
3699 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3700   const unsigned Opc = Inst.getOpcode();
3701   const MCInstrDesc &Desc = MII.get(Opc);
3702 
3703   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3704     return true;
3705 
3706   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3707   if (DimIdx < 0)
3708     return true;
3709 
3710   long Imm = Inst.getOperand(DimIdx).getImm();
3711   if (Imm < 0 || Imm >= 8)
3712     return false;
3713 
3714   return true;
3715 }
3716 
3717 static bool IsRevOpcode(const unsigned Opcode)
3718 {
3719   switch (Opcode) {
3720   case AMDGPU::V_SUBREV_F32_e32:
3721   case AMDGPU::V_SUBREV_F32_e64:
3722   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3723   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3724   case AMDGPU::V_SUBREV_F32_e32_vi:
3725   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3726   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3727   case AMDGPU::V_SUBREV_F32_e64_vi:
3728 
3729   case AMDGPU::V_SUBREV_CO_U32_e32:
3730   case AMDGPU::V_SUBREV_CO_U32_e64:
3731   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3732   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3733 
3734   case AMDGPU::V_SUBBREV_U32_e32:
3735   case AMDGPU::V_SUBBREV_U32_e64:
3736   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3737   case AMDGPU::V_SUBBREV_U32_e32_vi:
3738   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3739   case AMDGPU::V_SUBBREV_U32_e64_vi:
3740 
3741   case AMDGPU::V_SUBREV_U32_e32:
3742   case AMDGPU::V_SUBREV_U32_e64:
3743   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3744   case AMDGPU::V_SUBREV_U32_e32_vi:
3745   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3746   case AMDGPU::V_SUBREV_U32_e64_vi:
3747 
3748   case AMDGPU::V_SUBREV_F16_e32:
3749   case AMDGPU::V_SUBREV_F16_e64:
3750   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3751   case AMDGPU::V_SUBREV_F16_e32_vi:
3752   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3753   case AMDGPU::V_SUBREV_F16_e64_vi:
3754 
3755   case AMDGPU::V_SUBREV_U16_e32:
3756   case AMDGPU::V_SUBREV_U16_e64:
3757   case AMDGPU::V_SUBREV_U16_e32_vi:
3758   case AMDGPU::V_SUBREV_U16_e64_vi:
3759 
3760   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3761   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3762   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3763 
3764   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3765   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3766 
3767   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3768   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3769 
3770   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3771   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3772 
3773   case AMDGPU::V_LSHRREV_B32_e32:
3774   case AMDGPU::V_LSHRREV_B32_e64:
3775   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3776   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3777   case AMDGPU::V_LSHRREV_B32_e32_vi:
3778   case AMDGPU::V_LSHRREV_B32_e64_vi:
3779   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3780   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3781 
3782   case AMDGPU::V_ASHRREV_I32_e32:
3783   case AMDGPU::V_ASHRREV_I32_e64:
3784   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3785   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3786   case AMDGPU::V_ASHRREV_I32_e32_vi:
3787   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3788   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3789   case AMDGPU::V_ASHRREV_I32_e64_vi:
3790 
3791   case AMDGPU::V_LSHLREV_B32_e32:
3792   case AMDGPU::V_LSHLREV_B32_e64:
3793   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3794   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3795   case AMDGPU::V_LSHLREV_B32_e32_vi:
3796   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3797   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3798   case AMDGPU::V_LSHLREV_B32_e64_vi:
3799 
3800   case AMDGPU::V_LSHLREV_B16_e32:
3801   case AMDGPU::V_LSHLREV_B16_e64:
3802   case AMDGPU::V_LSHLREV_B16_e32_vi:
3803   case AMDGPU::V_LSHLREV_B16_e64_vi:
3804   case AMDGPU::V_LSHLREV_B16_gfx10:
3805 
3806   case AMDGPU::V_LSHRREV_B16_e32:
3807   case AMDGPU::V_LSHRREV_B16_e64:
3808   case AMDGPU::V_LSHRREV_B16_e32_vi:
3809   case AMDGPU::V_LSHRREV_B16_e64_vi:
3810   case AMDGPU::V_LSHRREV_B16_gfx10:
3811 
3812   case AMDGPU::V_ASHRREV_I16_e32:
3813   case AMDGPU::V_ASHRREV_I16_e64:
3814   case AMDGPU::V_ASHRREV_I16_e32_vi:
3815   case AMDGPU::V_ASHRREV_I16_e64_vi:
3816   case AMDGPU::V_ASHRREV_I16_gfx10:
3817 
3818   case AMDGPU::V_LSHLREV_B64_e64:
3819   case AMDGPU::V_LSHLREV_B64_gfx10:
3820   case AMDGPU::V_LSHLREV_B64_vi:
3821 
3822   case AMDGPU::V_LSHRREV_B64_e64:
3823   case AMDGPU::V_LSHRREV_B64_gfx10:
3824   case AMDGPU::V_LSHRREV_B64_vi:
3825 
3826   case AMDGPU::V_ASHRREV_I64_e64:
3827   case AMDGPU::V_ASHRREV_I64_gfx10:
3828   case AMDGPU::V_ASHRREV_I64_vi:
3829 
3830   case AMDGPU::V_PK_LSHLREV_B16:
3831   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3832   case AMDGPU::V_PK_LSHLREV_B16_vi:
3833 
3834   case AMDGPU::V_PK_LSHRREV_B16:
3835   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3836   case AMDGPU::V_PK_LSHRREV_B16_vi:
3837   case AMDGPU::V_PK_ASHRREV_I16:
3838   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3839   case AMDGPU::V_PK_ASHRREV_I16_vi:
3840     return true;
3841   default:
3842     return false;
3843   }
3844 }
3845 
3846 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3847 
3848   using namespace SIInstrFlags;
3849   const unsigned Opcode = Inst.getOpcode();
3850   const MCInstrDesc &Desc = MII.get(Opcode);
3851 
3852   // lds_direct register is defined so that it can be used
3853   // with 9-bit operands only. Ignore encodings which do not accept these.
3854   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3855   if ((Desc.TSFlags & Enc) == 0)
3856     return None;
3857 
3858   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3859     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3860     if (SrcIdx == -1)
3861       break;
3862     const auto &Src = Inst.getOperand(SrcIdx);
3863     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3864 
3865       if (isGFX90A())
3866         return StringRef("lds_direct is not supported on this GPU");
3867 
3868       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3869         return StringRef("lds_direct cannot be used with this instruction");
3870 
3871       if (SrcName != OpName::src0)
3872         return StringRef("lds_direct may be used as src0 only");
3873     }
3874   }
3875 
3876   return None;
3877 }
3878 
3879 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3880   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3881     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3882     if (Op.isFlatOffset())
3883       return Op.getStartLoc();
3884   }
3885   return getLoc();
3886 }
3887 
3888 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3889                                          const OperandVector &Operands) {
3890   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3891   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3892     return true;
3893 
3894   auto Opcode = Inst.getOpcode();
3895   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3896   assert(OpNum != -1);
3897 
3898   const auto &Op = Inst.getOperand(OpNum);
3899   if (!hasFlatOffsets() && Op.getImm() != 0) {
3900     Error(getFlatOffsetLoc(Operands),
3901           "flat offset modifier is not supported on this GPU");
3902     return false;
3903   }
3904 
3905   // For FLAT segment the offset must be positive;
3906   // MSB is ignored and forced to zero.
3907   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3908     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3909     if (!isIntN(OffsetSize, Op.getImm())) {
3910       Error(getFlatOffsetLoc(Operands),
3911             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3912       return false;
3913     }
3914   } else {
3915     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3916     if (!isUIntN(OffsetSize, Op.getImm())) {
3917       Error(getFlatOffsetLoc(Operands),
3918             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3919       return false;
3920     }
3921   }
3922 
3923   return true;
3924 }
3925 
3926 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3927   // Start with second operand because SMEM Offset cannot be dst or src0.
3928   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3930     if (Op.isSMEMOffset())
3931       return Op.getStartLoc();
3932   }
3933   return getLoc();
3934 }
3935 
3936 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3937                                          const OperandVector &Operands) {
3938   if (isCI() || isSI())
3939     return true;
3940 
3941   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3942   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3943     return true;
3944 
3945   auto Opcode = Inst.getOpcode();
3946   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3947   if (OpNum == -1)
3948     return true;
3949 
3950   const auto &Op = Inst.getOperand(OpNum);
3951   if (!Op.isImm())
3952     return true;
3953 
3954   uint64_t Offset = Op.getImm();
3955   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3956   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3957       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3958     return true;
3959 
3960   Error(getSMEMOffsetLoc(Operands),
3961         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3962                                "expected a 21-bit signed offset");
3963 
3964   return false;
3965 }
3966 
3967 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3968   unsigned Opcode = Inst.getOpcode();
3969   const MCInstrDesc &Desc = MII.get(Opcode);
3970   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3971     return true;
3972 
3973   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3974   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3975 
3976   const int OpIndices[] = { Src0Idx, Src1Idx };
3977 
3978   unsigned NumExprs = 0;
3979   unsigned NumLiterals = 0;
3980   uint32_t LiteralValue;
3981 
3982   for (int OpIdx : OpIndices) {
3983     if (OpIdx == -1) break;
3984 
3985     const MCOperand &MO = Inst.getOperand(OpIdx);
3986     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3987     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3988       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3989         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3990         if (NumLiterals == 0 || LiteralValue != Value) {
3991           LiteralValue = Value;
3992           ++NumLiterals;
3993         }
3994       } else if (MO.isExpr()) {
3995         ++NumExprs;
3996       }
3997     }
3998   }
3999 
4000   return NumLiterals + NumExprs <= 1;
4001 }
4002 
4003 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4004   const unsigned Opc = Inst.getOpcode();
4005   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4006       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4007     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4008     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4009 
4010     if (OpSel & ~3)
4011       return false;
4012   }
4013   return true;
4014 }
4015 
4016 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4017                                   const OperandVector &Operands) {
4018   const unsigned Opc = Inst.getOpcode();
4019   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4020   if (DppCtrlIdx < 0)
4021     return true;
4022   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4023 
4024   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4025     // DPP64 is supported for row_newbcast only.
4026     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4027     if (Src0Idx >= 0 &&
4028         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4029       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4030       Error(S, "64 bit dpp only supports row_newbcast");
4031       return false;
4032     }
4033   }
4034 
4035   return true;
4036 }
4037 
4038 // Check if VCC register matches wavefront size
4039 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4040   auto FB = getFeatureBits();
4041   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4042     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4043 }
4044 
4045 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4046 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4047                                          const OperandVector &Operands) {
4048   unsigned Opcode = Inst.getOpcode();
4049   const MCInstrDesc &Desc = MII.get(Opcode);
4050   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4051   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4052       ImmIdx == -1)
4053     return true;
4054 
4055   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4056   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4057   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4058 
4059   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4060 
4061   unsigned NumExprs = 0;
4062   unsigned NumLiterals = 0;
4063   uint32_t LiteralValue;
4064 
4065   for (int OpIdx : OpIndices) {
4066     if (OpIdx == -1)
4067       continue;
4068 
4069     const MCOperand &MO = Inst.getOperand(OpIdx);
4070     if (!MO.isImm() && !MO.isExpr())
4071       continue;
4072     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4073       continue;
4074 
4075     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4076         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4077       Error(getConstLoc(Operands),
4078             "inline constants are not allowed for this operand");
4079       return false;
4080     }
4081 
4082     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4083       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4084       if (NumLiterals == 0 || LiteralValue != Value) {
4085         LiteralValue = Value;
4086         ++NumLiterals;
4087       }
4088     } else if (MO.isExpr()) {
4089       ++NumExprs;
4090     }
4091   }
4092   NumLiterals += NumExprs;
4093 
4094   if (!NumLiterals)
4095     return true;
4096 
4097   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4098     Error(getLitLoc(Operands), "literal operands are not supported");
4099     return false;
4100   }
4101 
4102   if (NumLiterals > 1) {
4103     Error(getLitLoc(Operands), "only one literal operand is allowed");
4104     return false;
4105   }
4106 
4107   return true;
4108 }
4109 
4110 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4111 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4112                          const MCRegisterInfo *MRI) {
4113   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4114   if (OpIdx < 0)
4115     return -1;
4116 
4117   const MCOperand &Op = Inst.getOperand(OpIdx);
4118   if (!Op.isReg())
4119     return -1;
4120 
4121   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4122   auto Reg = Sub ? Sub : Op.getReg();
4123   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4124   return AGPR32.contains(Reg) ? 1 : 0;
4125 }
4126 
4127 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4128   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4129   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4130                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4131                   SIInstrFlags::DS)) == 0)
4132     return true;
4133 
4134   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4135                                                       : AMDGPU::OpName::vdata;
4136 
4137   const MCRegisterInfo *MRI = getMRI();
4138   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4139   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4140 
4141   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4142     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4143     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4144       return false;
4145   }
4146 
4147   auto FB = getFeatureBits();
4148   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4149     if (DataAreg < 0 || DstAreg < 0)
4150       return true;
4151     return DstAreg == DataAreg;
4152   }
4153 
4154   return DstAreg < 1 && DataAreg < 1;
4155 }
4156 
4157 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4158   auto FB = getFeatureBits();
4159   if (!FB[AMDGPU::FeatureGFX90AInsts])
4160     return true;
4161 
4162   const MCRegisterInfo *MRI = getMRI();
4163   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4164   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4165   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4166     const MCOperand &Op = Inst.getOperand(I);
4167     if (!Op.isReg())
4168       continue;
4169 
4170     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4171     if (!Sub)
4172       continue;
4173 
4174     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4175       return false;
4176     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4177       return false;
4178   }
4179 
4180   return true;
4181 }
4182 
4183 // gfx90a has an undocumented limitation:
4184 // DS_GWS opcodes must use even aligned registers.
4185 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4186                                   const OperandVector &Operands) {
4187   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4188     return true;
4189 
4190   int Opc = Inst.getOpcode();
4191   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4192       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4193     return true;
4194 
4195   const MCRegisterInfo *MRI = getMRI();
4196   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4197   int Data0Pos =
4198       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4199   assert(Data0Pos != -1);
4200   auto Reg = Inst.getOperand(Data0Pos).getReg();
4201   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4202   if (RegIdx & 1) {
4203     SMLoc RegLoc = getRegLoc(Reg, Operands);
4204     Error(RegLoc, "vgpr must be even aligned");
4205     return false;
4206   }
4207 
4208   return true;
4209 }
4210 
4211 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4212                                             const OperandVector &Operands,
4213                                             const SMLoc &IDLoc) {
4214   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4215                                            AMDGPU::OpName::cpol);
4216   if (CPolPos == -1)
4217     return true;
4218 
4219   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4220 
4221   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4222   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4223       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4224     Error(IDLoc, "invalid cache policy for SMRD instruction");
4225     return false;
4226   }
4227 
4228   if (isGFX90A() && (CPol & CPol::SCC)) {
4229     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4230     StringRef CStr(S.getPointer());
4231     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4232     Error(S, "scc is not supported on this GPU");
4233     return false;
4234   }
4235 
4236   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4237     return true;
4238 
4239   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4240     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4241       Error(IDLoc, "instruction must use glc");
4242       return false;
4243     }
4244   } else {
4245     if (CPol & CPol::GLC) {
4246       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4247       StringRef CStr(S.getPointer());
4248       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4249       Error(S, "instruction must not use glc");
4250       return false;
4251     }
4252   }
4253 
4254   return true;
4255 }
4256 
4257 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4258                                           const SMLoc &IDLoc,
4259                                           const OperandVector &Operands) {
4260   if (auto ErrMsg = validateLdsDirect(Inst)) {
4261     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4262     return false;
4263   }
4264   if (!validateSOPLiteral(Inst)) {
4265     Error(getLitLoc(Operands),
4266       "only one literal operand is allowed");
4267     return false;
4268   }
4269   if (!validateVOPLiteral(Inst, Operands)) {
4270     return false;
4271   }
4272   if (!validateConstantBusLimitations(Inst, Operands)) {
4273     return false;
4274   }
4275   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4276     return false;
4277   }
4278   if (!validateIntClampSupported(Inst)) {
4279     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4280       "integer clamping is not supported on this GPU");
4281     return false;
4282   }
4283   if (!validateOpSel(Inst)) {
4284     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4285       "invalid op_sel operand");
4286     return false;
4287   }
4288   if (!validateDPP(Inst, Operands)) {
4289     return false;
4290   }
4291   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4292   if (!validateMIMGD16(Inst)) {
4293     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4294       "d16 modifier is not supported on this GPU");
4295     return false;
4296   }
4297   if (!validateMIMGDim(Inst)) {
4298     Error(IDLoc, "dim modifier is required on this GPU");
4299     return false;
4300   }
4301   if (!validateMIMGMSAA(Inst)) {
4302     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4303           "invalid dim; must be MSAA type");
4304     return false;
4305   }
4306   if (!validateMIMGDataSize(Inst)) {
4307     Error(IDLoc,
4308       "image data size does not match dmask and tfe");
4309     return false;
4310   }
4311   if (!validateMIMGAddrSize(Inst)) {
4312     Error(IDLoc,
4313       "image address size does not match dim and a16");
4314     return false;
4315   }
4316   if (!validateMIMGAtomicDMask(Inst)) {
4317     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4318       "invalid atomic image dmask");
4319     return false;
4320   }
4321   if (!validateMIMGGatherDMask(Inst)) {
4322     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4323       "invalid image_gather dmask: only one bit must be set");
4324     return false;
4325   }
4326   if (!validateMovrels(Inst, Operands)) {
4327     return false;
4328   }
4329   if (!validateFlatOffset(Inst, Operands)) {
4330     return false;
4331   }
4332   if (!validateSMEMOffset(Inst, Operands)) {
4333     return false;
4334   }
4335   if (!validateMAIAccWrite(Inst, Operands)) {
4336     return false;
4337   }
4338   if (!validateMFMA(Inst, Operands)) {
4339     return false;
4340   }
4341   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4342     return false;
4343   }
4344 
4345   if (!validateAGPRLdSt(Inst)) {
4346     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4347     ? "invalid register class: data and dst should be all VGPR or AGPR"
4348     : "invalid register class: agpr loads and stores not supported on this GPU"
4349     );
4350     return false;
4351   }
4352   if (!validateVGPRAlign(Inst)) {
4353     Error(IDLoc,
4354       "invalid register class: vgpr tuples must be 64 bit aligned");
4355     return false;
4356   }
4357   if (!validateGWS(Inst, Operands)) {
4358     return false;
4359   }
4360 
4361   if (!validateDivScale(Inst)) {
4362     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4363     return false;
4364   }
4365   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4366     return false;
4367   }
4368 
4369   return true;
4370 }
4371 
4372 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4373                                             const FeatureBitset &FBS,
4374                                             unsigned VariantID = 0);
4375 
4376 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4377                                 const FeatureBitset &AvailableFeatures,
4378                                 unsigned VariantID);
4379 
4380 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4381                                        const FeatureBitset &FBS) {
4382   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4383 }
4384 
4385 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4386                                        const FeatureBitset &FBS,
4387                                        ArrayRef<unsigned> Variants) {
4388   for (auto Variant : Variants) {
4389     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4390       return true;
4391   }
4392 
4393   return false;
4394 }
4395 
4396 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4397                                                   const SMLoc &IDLoc) {
4398   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4399 
4400   // Check if requested instruction variant is supported.
4401   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4402     return false;
4403 
4404   // This instruction is not supported.
4405   // Clear any other pending errors because they are no longer relevant.
4406   getParser().clearPendingErrors();
4407 
4408   // Requested instruction variant is not supported.
4409   // Check if any other variants are supported.
4410   StringRef VariantName = getMatchedVariantName();
4411   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4412     return Error(IDLoc,
4413                  Twine(VariantName,
4414                        " variant of this instruction is not supported"));
4415   }
4416 
4417   // Finally check if this instruction is supported on any other GPU.
4418   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4419     return Error(IDLoc, "instruction not supported on this GPU");
4420   }
4421 
4422   // Instruction not supported on any GPU. Probably a typo.
4423   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4424   return Error(IDLoc, "invalid instruction" + Suggestion);
4425 }
4426 
4427 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4428                                               OperandVector &Operands,
4429                                               MCStreamer &Out,
4430                                               uint64_t &ErrorInfo,
4431                                               bool MatchingInlineAsm) {
4432   MCInst Inst;
4433   unsigned Result = Match_Success;
4434   for (auto Variant : getMatchedVariants()) {
4435     uint64_t EI;
4436     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4437                                   Variant);
4438     // We order match statuses from least to most specific. We use most specific
4439     // status as resulting
4440     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4441     if ((R == Match_Success) ||
4442         (R == Match_PreferE32) ||
4443         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4444         (R == Match_InvalidOperand && Result != Match_MissingFeature
4445                                    && Result != Match_PreferE32) ||
4446         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4447                                    && Result != Match_MissingFeature
4448                                    && Result != Match_PreferE32)) {
4449       Result = R;
4450       ErrorInfo = EI;
4451     }
4452     if (R == Match_Success)
4453       break;
4454   }
4455 
4456   if (Result == Match_Success) {
4457     if (!validateInstruction(Inst, IDLoc, Operands)) {
4458       return true;
4459     }
4460     Inst.setLoc(IDLoc);
4461     Out.emitInstruction(Inst, getSTI());
4462     return false;
4463   }
4464 
4465   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4466   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4467     return true;
4468   }
4469 
4470   switch (Result) {
4471   default: break;
4472   case Match_MissingFeature:
4473     // It has been verified that the specified instruction
4474     // mnemonic is valid. A match was found but it requires
4475     // features which are not supported on this GPU.
4476     return Error(IDLoc, "operands are not valid for this GPU or mode");
4477 
4478   case Match_InvalidOperand: {
4479     SMLoc ErrorLoc = IDLoc;
4480     if (ErrorInfo != ~0ULL) {
4481       if (ErrorInfo >= Operands.size()) {
4482         return Error(IDLoc, "too few operands for instruction");
4483       }
4484       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4485       if (ErrorLoc == SMLoc())
4486         ErrorLoc = IDLoc;
4487     }
4488     return Error(ErrorLoc, "invalid operand for instruction");
4489   }
4490 
4491   case Match_PreferE32:
4492     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4493                         "should be encoded as e32");
4494   case Match_MnemonicFail:
4495     llvm_unreachable("Invalid instructions should have been handled already");
4496   }
4497   llvm_unreachable("Implement any new match types added!");
4498 }
4499 
4500 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4501   int64_t Tmp = -1;
4502   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4503     return true;
4504   }
4505   if (getParser().parseAbsoluteExpression(Tmp)) {
4506     return true;
4507   }
4508   Ret = static_cast<uint32_t>(Tmp);
4509   return false;
4510 }
4511 
4512 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4513                                                uint32_t &Minor) {
4514   if (ParseAsAbsoluteExpression(Major))
4515     return TokError("invalid major version");
4516 
4517   if (!trySkipToken(AsmToken::Comma))
4518     return TokError("minor version number required, comma expected");
4519 
4520   if (ParseAsAbsoluteExpression(Minor))
4521     return TokError("invalid minor version");
4522 
4523   return false;
4524 }
4525 
4526 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4527   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4528     return TokError("directive only supported for amdgcn architecture");
4529 
4530   std::string TargetIDDirective;
4531   SMLoc TargetStart = getTok().getLoc();
4532   if (getParser().parseEscapedString(TargetIDDirective))
4533     return true;
4534 
4535   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4536   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4537     return getParser().Error(TargetRange.Start,
4538         (Twine(".amdgcn_target directive's target id ") +
4539          Twine(TargetIDDirective) +
4540          Twine(" does not match the specified target id ") +
4541          Twine(getTargetStreamer().getTargetID()->toString())).str());
4542 
4543   return false;
4544 }
4545 
4546 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4547   return Error(Range.Start, "value out of range", Range);
4548 }
4549 
4550 bool AMDGPUAsmParser::calculateGPRBlocks(
4551     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4552     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4553     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4554     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4555   // TODO(scott.linder): These calculations are duplicated from
4556   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4557   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4558 
4559   unsigned NumVGPRs = NextFreeVGPR;
4560   unsigned NumSGPRs = NextFreeSGPR;
4561 
4562   if (Version.Major >= 10)
4563     NumSGPRs = 0;
4564   else {
4565     unsigned MaxAddressableNumSGPRs =
4566         IsaInfo::getAddressableNumSGPRs(&getSTI());
4567 
4568     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4569         NumSGPRs > MaxAddressableNumSGPRs)
4570       return OutOfRangeError(SGPRRange);
4571 
4572     NumSGPRs +=
4573         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4574 
4575     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4576         NumSGPRs > MaxAddressableNumSGPRs)
4577       return OutOfRangeError(SGPRRange);
4578 
4579     if (Features.test(FeatureSGPRInitBug))
4580       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4581   }
4582 
4583   VGPRBlocks =
4584       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4585   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4586 
4587   return false;
4588 }
4589 
4590 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4591   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4592     return TokError("directive only supported for amdgcn architecture");
4593 
4594   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4595     return TokError("directive only supported for amdhsa OS");
4596 
4597   StringRef KernelName;
4598   if (getParser().parseIdentifier(KernelName))
4599     return true;
4600 
4601   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4602 
4603   StringSet<> Seen;
4604 
4605   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4606 
4607   SMRange VGPRRange;
4608   uint64_t NextFreeVGPR = 0;
4609   uint64_t AccumOffset = 0;
4610   SMRange SGPRRange;
4611   uint64_t NextFreeSGPR = 0;
4612 
4613   // Count the number of user SGPRs implied from the enabled feature bits.
4614   unsigned ImpliedUserSGPRCount = 0;
4615 
4616   // Track if the asm explicitly contains the directive for the user SGPR
4617   // count.
4618   Optional<unsigned> ExplicitUserSGPRCount;
4619   bool ReserveVCC = true;
4620   bool ReserveFlatScr = true;
4621   Optional<bool> EnableWavefrontSize32;
4622 
4623   while (true) {
4624     while (trySkipToken(AsmToken::EndOfStatement));
4625 
4626     StringRef ID;
4627     SMRange IDRange = getTok().getLocRange();
4628     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4629       return true;
4630 
4631     if (ID == ".end_amdhsa_kernel")
4632       break;
4633 
4634     if (Seen.find(ID) != Seen.end())
4635       return TokError(".amdhsa_ directives cannot be repeated");
4636     Seen.insert(ID);
4637 
4638     SMLoc ValStart = getLoc();
4639     int64_t IVal;
4640     if (getParser().parseAbsoluteExpression(IVal))
4641       return true;
4642     SMLoc ValEnd = getLoc();
4643     SMRange ValRange = SMRange(ValStart, ValEnd);
4644 
4645     if (IVal < 0)
4646       return OutOfRangeError(ValRange);
4647 
4648     uint64_t Val = IVal;
4649 
4650 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4651   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4652     return OutOfRangeError(RANGE);                                             \
4653   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4654 
4655     if (ID == ".amdhsa_group_segment_fixed_size") {
4656       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4657         return OutOfRangeError(ValRange);
4658       KD.group_segment_fixed_size = Val;
4659     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4660       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4661         return OutOfRangeError(ValRange);
4662       KD.private_segment_fixed_size = Val;
4663     } else if (ID == ".amdhsa_kernarg_size") {
4664       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4665         return OutOfRangeError(ValRange);
4666       KD.kernarg_size = Val;
4667     } else if (ID == ".amdhsa_user_sgpr_count") {
4668       ExplicitUserSGPRCount = Val;
4669     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4670       if (hasArchitectedFlatScratch())
4671         return Error(IDRange.Start,
4672                      "directive is not supported with architected flat scratch",
4673                      IDRange);
4674       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4675                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4676                        Val, ValRange);
4677       if (Val)
4678         ImpliedUserSGPRCount += 4;
4679     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4680       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4681                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4682                        ValRange);
4683       if (Val)
4684         ImpliedUserSGPRCount += 2;
4685     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4686       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4687                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4688                        ValRange);
4689       if (Val)
4690         ImpliedUserSGPRCount += 2;
4691     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4692       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4693                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4694                        Val, ValRange);
4695       if (Val)
4696         ImpliedUserSGPRCount += 2;
4697     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4698       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4699                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4700                        ValRange);
4701       if (Val)
4702         ImpliedUserSGPRCount += 2;
4703     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4704       if (hasArchitectedFlatScratch())
4705         return Error(IDRange.Start,
4706                      "directive is not supported with architected flat scratch",
4707                      IDRange);
4708       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4709                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4710                        ValRange);
4711       if (Val)
4712         ImpliedUserSGPRCount += 2;
4713     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4714       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4715                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4716                        Val, ValRange);
4717       if (Val)
4718         ImpliedUserSGPRCount += 1;
4719     } else if (ID == ".amdhsa_wavefront_size32") {
4720       if (IVersion.Major < 10)
4721         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4722       EnableWavefrontSize32 = Val;
4723       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4724                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4725                        Val, ValRange);
4726     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4727       if (hasArchitectedFlatScratch())
4728         return Error(IDRange.Start,
4729                      "directive is not supported with architected flat scratch",
4730                      IDRange);
4731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4732                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4733     } else if (ID == ".amdhsa_enable_private_segment") {
4734       if (!hasArchitectedFlatScratch())
4735         return Error(
4736             IDRange.Start,
4737             "directive is not supported without architected flat scratch",
4738             IDRange);
4739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4740                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4741     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4742       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4743                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4744                        ValRange);
4745     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4746       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4747                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4748                        ValRange);
4749     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4750       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4751                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4752                        ValRange);
4753     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4754       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4755                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4756                        ValRange);
4757     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4758       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4759                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4760                        ValRange);
4761     } else if (ID == ".amdhsa_next_free_vgpr") {
4762       VGPRRange = ValRange;
4763       NextFreeVGPR = Val;
4764     } else if (ID == ".amdhsa_next_free_sgpr") {
4765       SGPRRange = ValRange;
4766       NextFreeSGPR = Val;
4767     } else if (ID == ".amdhsa_accum_offset") {
4768       if (!isGFX90A())
4769         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4770       AccumOffset = Val;
4771     } else if (ID == ".amdhsa_reserve_vcc") {
4772       if (!isUInt<1>(Val))
4773         return OutOfRangeError(ValRange);
4774       ReserveVCC = Val;
4775     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4776       if (IVersion.Major < 7)
4777         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4778       if (hasArchitectedFlatScratch())
4779         return Error(IDRange.Start,
4780                      "directive is not supported with architected flat scratch",
4781                      IDRange);
4782       if (!isUInt<1>(Val))
4783         return OutOfRangeError(ValRange);
4784       ReserveFlatScr = Val;
4785     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4786       if (IVersion.Major < 8)
4787         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4788       if (!isUInt<1>(Val))
4789         return OutOfRangeError(ValRange);
4790       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4791         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4792                                  IDRange);
4793     } else if (ID == ".amdhsa_float_round_mode_32") {
4794       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4795                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4796     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4798                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4799     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4800       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4801                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4802     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4804                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4805                        ValRange);
4806     } else if (ID == ".amdhsa_dx10_clamp") {
4807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4808                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4809     } else if (ID == ".amdhsa_ieee_mode") {
4810       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4811                        Val, ValRange);
4812     } else if (ID == ".amdhsa_fp16_overflow") {
4813       if (IVersion.Major < 9)
4814         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4815       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4816                        ValRange);
4817     } else if (ID == ".amdhsa_tg_split") {
4818       if (!isGFX90A())
4819         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4821                        ValRange);
4822     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4823       if (IVersion.Major < 10)
4824         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4825       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4826                        ValRange);
4827     } else if (ID == ".amdhsa_memory_ordered") {
4828       if (IVersion.Major < 10)
4829         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4830       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4831                        ValRange);
4832     } else if (ID == ".amdhsa_forward_progress") {
4833       if (IVersion.Major < 10)
4834         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4835       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4836                        ValRange);
4837     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4838       PARSE_BITS_ENTRY(
4839           KD.compute_pgm_rsrc2,
4840           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4841           ValRange);
4842     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4843       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4844                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4845                        Val, ValRange);
4846     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4847       PARSE_BITS_ENTRY(
4848           KD.compute_pgm_rsrc2,
4849           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4850           ValRange);
4851     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4852       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4853                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4854                        Val, ValRange);
4855     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4856       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4857                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4858                        Val, ValRange);
4859     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4860       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4861                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4862                        Val, ValRange);
4863     } else if (ID == ".amdhsa_exception_int_div_zero") {
4864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4865                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4866                        Val, ValRange);
4867     } else {
4868       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4869     }
4870 
4871 #undef PARSE_BITS_ENTRY
4872   }
4873 
4874   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4875     return TokError(".amdhsa_next_free_vgpr directive is required");
4876 
4877   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4878     return TokError(".amdhsa_next_free_sgpr directive is required");
4879 
4880   unsigned VGPRBlocks;
4881   unsigned SGPRBlocks;
4882   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4883                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4884                          EnableWavefrontSize32, NextFreeVGPR,
4885                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4886                          SGPRBlocks))
4887     return true;
4888 
4889   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4890           VGPRBlocks))
4891     return OutOfRangeError(VGPRRange);
4892   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4893                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4894 
4895   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4896           SGPRBlocks))
4897     return OutOfRangeError(SGPRRange);
4898   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4899                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4900                   SGPRBlocks);
4901 
4902   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4903     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4904                     "enabled user SGPRs");
4905 
4906   unsigned UserSGPRCount =
4907       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4908 
4909   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4910     return TokError("too many user SGPRs enabled");
4911   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4912                   UserSGPRCount);
4913 
4914   if (isGFX90A()) {
4915     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4916       return TokError(".amdhsa_accum_offset directive is required");
4917     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4918       return TokError("accum_offset should be in range [4..256] in "
4919                       "increments of 4");
4920     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4921       return TokError("accum_offset exceeds total VGPR allocation");
4922     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4923                     (AccumOffset / 4 - 1));
4924   }
4925 
4926   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4927       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4928       ReserveFlatScr);
4929   return false;
4930 }
4931 
4932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4933   uint32_t Major;
4934   uint32_t Minor;
4935 
4936   if (ParseDirectiveMajorMinor(Major, Minor))
4937     return true;
4938 
4939   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4940   return false;
4941 }
4942 
4943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4944   uint32_t Major;
4945   uint32_t Minor;
4946   uint32_t Stepping;
4947   StringRef VendorName;
4948   StringRef ArchName;
4949 
4950   // If this directive has no arguments, then use the ISA version for the
4951   // targeted GPU.
4952   if (isToken(AsmToken::EndOfStatement)) {
4953     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4954     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4955                                                         ISA.Stepping,
4956                                                         "AMD", "AMDGPU");
4957     return false;
4958   }
4959 
4960   if (ParseDirectiveMajorMinor(Major, Minor))
4961     return true;
4962 
4963   if (!trySkipToken(AsmToken::Comma))
4964     return TokError("stepping version number required, comma expected");
4965 
4966   if (ParseAsAbsoluteExpression(Stepping))
4967     return TokError("invalid stepping version");
4968 
4969   if (!trySkipToken(AsmToken::Comma))
4970     return TokError("vendor name required, comma expected");
4971 
4972   if (!parseString(VendorName, "invalid vendor name"))
4973     return true;
4974 
4975   if (!trySkipToken(AsmToken::Comma))
4976     return TokError("arch name required, comma expected");
4977 
4978   if (!parseString(ArchName, "invalid arch name"))
4979     return true;
4980 
4981   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4982                                                       VendorName, ArchName);
4983   return false;
4984 }
4985 
4986 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4987                                                amd_kernel_code_t &Header) {
4988   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4989   // assembly for backwards compatibility.
4990   if (ID == "max_scratch_backing_memory_byte_size") {
4991     Parser.eatToEndOfStatement();
4992     return false;
4993   }
4994 
4995   SmallString<40> ErrStr;
4996   raw_svector_ostream Err(ErrStr);
4997   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4998     return TokError(Err.str());
4999   }
5000   Lex();
5001 
5002   if (ID == "enable_wavefront_size32") {
5003     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5004       if (!isGFX10Plus())
5005         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5006       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5007         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5008     } else {
5009       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5010         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5011     }
5012   }
5013 
5014   if (ID == "wavefront_size") {
5015     if (Header.wavefront_size == 5) {
5016       if (!isGFX10Plus())
5017         return TokError("wavefront_size=5 is only allowed on GFX10+");
5018       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5019         return TokError("wavefront_size=5 requires +WavefrontSize32");
5020     } else if (Header.wavefront_size == 6) {
5021       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5022         return TokError("wavefront_size=6 requires +WavefrontSize64");
5023     }
5024   }
5025 
5026   if (ID == "enable_wgp_mode") {
5027     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5028         !isGFX10Plus())
5029       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5030   }
5031 
5032   if (ID == "enable_mem_ordered") {
5033     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5034         !isGFX10Plus())
5035       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5036   }
5037 
5038   if (ID == "enable_fwd_progress") {
5039     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5040         !isGFX10Plus())
5041       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5042   }
5043 
5044   return false;
5045 }
5046 
5047 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5048   amd_kernel_code_t Header;
5049   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5050 
5051   while (true) {
5052     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5053     // will set the current token to EndOfStatement.
5054     while(trySkipToken(AsmToken::EndOfStatement));
5055 
5056     StringRef ID;
5057     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5058       return true;
5059 
5060     if (ID == ".end_amd_kernel_code_t")
5061       break;
5062 
5063     if (ParseAMDKernelCodeTValue(ID, Header))
5064       return true;
5065   }
5066 
5067   getTargetStreamer().EmitAMDKernelCodeT(Header);
5068 
5069   return false;
5070 }
5071 
5072 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5073   StringRef KernelName;
5074   if (!parseId(KernelName, "expected symbol name"))
5075     return true;
5076 
5077   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5078                                            ELF::STT_AMDGPU_HSA_KERNEL);
5079 
5080   KernelScope.initialize(getContext());
5081   return false;
5082 }
5083 
5084 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5085   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5086     return Error(getLoc(),
5087                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5088                  "architectures");
5089   }
5090 
5091   auto TargetIDDirective = getLexer().getTok().getStringContents();
5092   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5093     return Error(getParser().getTok().getLoc(), "target id must match options");
5094 
5095   getTargetStreamer().EmitISAVersion();
5096   Lex();
5097 
5098   return false;
5099 }
5100 
5101 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5102   const char *AssemblerDirectiveBegin;
5103   const char *AssemblerDirectiveEnd;
5104   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5105       isHsaAbiVersion3AndAbove(&getSTI())
5106           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5107                             HSAMD::V3::AssemblerDirectiveEnd)
5108           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5109                             HSAMD::AssemblerDirectiveEnd);
5110 
5111   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5112     return Error(getLoc(),
5113                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5114                  "not available on non-amdhsa OSes")).str());
5115   }
5116 
5117   std::string HSAMetadataString;
5118   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5119                           HSAMetadataString))
5120     return true;
5121 
5122   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5123     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5124       return Error(getLoc(), "invalid HSA metadata");
5125   } else {
5126     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5127       return Error(getLoc(), "invalid HSA metadata");
5128   }
5129 
5130   return false;
5131 }
5132 
5133 /// Common code to parse out a block of text (typically YAML) between start and
5134 /// end directives.
5135 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5136                                           const char *AssemblerDirectiveEnd,
5137                                           std::string &CollectString) {
5138 
5139   raw_string_ostream CollectStream(CollectString);
5140 
5141   getLexer().setSkipSpace(false);
5142 
5143   bool FoundEnd = false;
5144   while (!isToken(AsmToken::Eof)) {
5145     while (isToken(AsmToken::Space)) {
5146       CollectStream << getTokenStr();
5147       Lex();
5148     }
5149 
5150     if (trySkipId(AssemblerDirectiveEnd)) {
5151       FoundEnd = true;
5152       break;
5153     }
5154 
5155     CollectStream << Parser.parseStringToEndOfStatement()
5156                   << getContext().getAsmInfo()->getSeparatorString();
5157 
5158     Parser.eatToEndOfStatement();
5159   }
5160 
5161   getLexer().setSkipSpace(true);
5162 
5163   if (isToken(AsmToken::Eof) && !FoundEnd) {
5164     return TokError(Twine("expected directive ") +
5165                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5166   }
5167 
5168   CollectStream.flush();
5169   return false;
5170 }
5171 
5172 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5173 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5174   std::string String;
5175   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5176                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5177     return true;
5178 
5179   auto PALMetadata = getTargetStreamer().getPALMetadata();
5180   if (!PALMetadata->setFromString(String))
5181     return Error(getLoc(), "invalid PAL metadata");
5182   return false;
5183 }
5184 
5185 /// Parse the assembler directive for old linear-format PAL metadata.
5186 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5187   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5188     return Error(getLoc(),
5189                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5190                  "not available on non-amdpal OSes")).str());
5191   }
5192 
5193   auto PALMetadata = getTargetStreamer().getPALMetadata();
5194   PALMetadata->setLegacy();
5195   for (;;) {
5196     uint32_t Key, Value;
5197     if (ParseAsAbsoluteExpression(Key)) {
5198       return TokError(Twine("invalid value in ") +
5199                       Twine(PALMD::AssemblerDirective));
5200     }
5201     if (!trySkipToken(AsmToken::Comma)) {
5202       return TokError(Twine("expected an even number of values in ") +
5203                       Twine(PALMD::AssemblerDirective));
5204     }
5205     if (ParseAsAbsoluteExpression(Value)) {
5206       return TokError(Twine("invalid value in ") +
5207                       Twine(PALMD::AssemblerDirective));
5208     }
5209     PALMetadata->setRegister(Key, Value);
5210     if (!trySkipToken(AsmToken::Comma))
5211       break;
5212   }
5213   return false;
5214 }
5215 
5216 /// ParseDirectiveAMDGPULDS
5217 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5218 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5219   if (getParser().checkForValidSection())
5220     return true;
5221 
5222   StringRef Name;
5223   SMLoc NameLoc = getLoc();
5224   if (getParser().parseIdentifier(Name))
5225     return TokError("expected identifier in directive");
5226 
5227   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5228   if (parseToken(AsmToken::Comma, "expected ','"))
5229     return true;
5230 
5231   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5232 
5233   int64_t Size;
5234   SMLoc SizeLoc = getLoc();
5235   if (getParser().parseAbsoluteExpression(Size))
5236     return true;
5237   if (Size < 0)
5238     return Error(SizeLoc, "size must be non-negative");
5239   if (Size > LocalMemorySize)
5240     return Error(SizeLoc, "size is too large");
5241 
5242   int64_t Alignment = 4;
5243   if (trySkipToken(AsmToken::Comma)) {
5244     SMLoc AlignLoc = getLoc();
5245     if (getParser().parseAbsoluteExpression(Alignment))
5246       return true;
5247     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5248       return Error(AlignLoc, "alignment must be a power of two");
5249 
5250     // Alignment larger than the size of LDS is possible in theory, as long
5251     // as the linker manages to place to symbol at address 0, but we do want
5252     // to make sure the alignment fits nicely into a 32-bit integer.
5253     if (Alignment >= 1u << 31)
5254       return Error(AlignLoc, "alignment is too large");
5255   }
5256 
5257   if (parseToken(AsmToken::EndOfStatement,
5258                  "unexpected token in '.amdgpu_lds' directive"))
5259     return true;
5260 
5261   Symbol->redefineIfPossible();
5262   if (!Symbol->isUndefined())
5263     return Error(NameLoc, "invalid symbol redefinition");
5264 
5265   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5266   return false;
5267 }
5268 
5269 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5270   StringRef IDVal = DirectiveID.getString();
5271 
5272   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5273     if (IDVal == ".amdhsa_kernel")
5274      return ParseDirectiveAMDHSAKernel();
5275 
5276     // TODO: Restructure/combine with PAL metadata directive.
5277     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5278       return ParseDirectiveHSAMetadata();
5279   } else {
5280     if (IDVal == ".hsa_code_object_version")
5281       return ParseDirectiveHSACodeObjectVersion();
5282 
5283     if (IDVal == ".hsa_code_object_isa")
5284       return ParseDirectiveHSACodeObjectISA();
5285 
5286     if (IDVal == ".amd_kernel_code_t")
5287       return ParseDirectiveAMDKernelCodeT();
5288 
5289     if (IDVal == ".amdgpu_hsa_kernel")
5290       return ParseDirectiveAMDGPUHsaKernel();
5291 
5292     if (IDVal == ".amd_amdgpu_isa")
5293       return ParseDirectiveISAVersion();
5294 
5295     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5296       return ParseDirectiveHSAMetadata();
5297   }
5298 
5299   if (IDVal == ".amdgcn_target")
5300     return ParseDirectiveAMDGCNTarget();
5301 
5302   if (IDVal == ".amdgpu_lds")
5303     return ParseDirectiveAMDGPULDS();
5304 
5305   if (IDVal == PALMD::AssemblerDirectiveBegin)
5306     return ParseDirectivePALMetadataBegin();
5307 
5308   if (IDVal == PALMD::AssemblerDirective)
5309     return ParseDirectivePALMetadata();
5310 
5311   return true;
5312 }
5313 
5314 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5315                                            unsigned RegNo) {
5316 
5317   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5318        R.isValid(); ++R) {
5319     if (*R == RegNo)
5320       return isGFX9Plus();
5321   }
5322 
5323   // GFX10 has 2 more SGPRs 104 and 105.
5324   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5325        R.isValid(); ++R) {
5326     if (*R == RegNo)
5327       return hasSGPR104_SGPR105();
5328   }
5329 
5330   switch (RegNo) {
5331   case AMDGPU::SRC_SHARED_BASE:
5332   case AMDGPU::SRC_SHARED_LIMIT:
5333   case AMDGPU::SRC_PRIVATE_BASE:
5334   case AMDGPU::SRC_PRIVATE_LIMIT:
5335   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5336     return isGFX9Plus();
5337   case AMDGPU::TBA:
5338   case AMDGPU::TBA_LO:
5339   case AMDGPU::TBA_HI:
5340   case AMDGPU::TMA:
5341   case AMDGPU::TMA_LO:
5342   case AMDGPU::TMA_HI:
5343     return !isGFX9Plus();
5344   case AMDGPU::XNACK_MASK:
5345   case AMDGPU::XNACK_MASK_LO:
5346   case AMDGPU::XNACK_MASK_HI:
5347     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5348   case AMDGPU::SGPR_NULL:
5349     return isGFX10Plus();
5350   default:
5351     break;
5352   }
5353 
5354   if (isCI())
5355     return true;
5356 
5357   if (isSI() || isGFX10Plus()) {
5358     // No flat_scr on SI.
5359     // On GFX10 flat scratch is not a valid register operand and can only be
5360     // accessed with s_setreg/s_getreg.
5361     switch (RegNo) {
5362     case AMDGPU::FLAT_SCR:
5363     case AMDGPU::FLAT_SCR_LO:
5364     case AMDGPU::FLAT_SCR_HI:
5365       return false;
5366     default:
5367       return true;
5368     }
5369   }
5370 
5371   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5372   // SI/CI have.
5373   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5374        R.isValid(); ++R) {
5375     if (*R == RegNo)
5376       return hasSGPR102_SGPR103();
5377   }
5378 
5379   return true;
5380 }
5381 
5382 OperandMatchResultTy
5383 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5384                               OperandMode Mode) {
5385   // Try to parse with a custom parser
5386   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5387 
5388   // If we successfully parsed the operand or if there as an error parsing,
5389   // we are done.
5390   //
5391   // If we are parsing after we reach EndOfStatement then this means we
5392   // are appending default values to the Operands list.  This is only done
5393   // by custom parser, so we shouldn't continue on to the generic parsing.
5394   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5395       isToken(AsmToken::EndOfStatement))
5396     return ResTy;
5397 
5398   SMLoc RBraceLoc;
5399   SMLoc LBraceLoc = getLoc();
5400   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5401     unsigned Prefix = Operands.size();
5402 
5403     for (;;) {
5404       auto Loc = getLoc();
5405       ResTy = parseReg(Operands);
5406       if (ResTy == MatchOperand_NoMatch)
5407         Error(Loc, "expected a register");
5408       if (ResTy != MatchOperand_Success)
5409         return MatchOperand_ParseFail;
5410 
5411       RBraceLoc = getLoc();
5412       if (trySkipToken(AsmToken::RBrac))
5413         break;
5414 
5415       if (!skipToken(AsmToken::Comma,
5416                      "expected a comma or a closing square bracket")) {
5417         return MatchOperand_ParseFail;
5418       }
5419     }
5420 
5421     if (Operands.size() - Prefix > 1) {
5422       Operands.insert(Operands.begin() + Prefix,
5423                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5424       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5425     }
5426 
5427     return MatchOperand_Success;
5428   }
5429 
5430   return parseRegOrImm(Operands);
5431 }
5432 
5433 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5434   // Clear any forced encodings from the previous instruction.
5435   setForcedEncodingSize(0);
5436   setForcedDPP(false);
5437   setForcedSDWA(false);
5438 
5439   if (Name.endswith("_e64")) {
5440     setForcedEncodingSize(64);
5441     return Name.substr(0, Name.size() - 4);
5442   } else if (Name.endswith("_e32")) {
5443     setForcedEncodingSize(32);
5444     return Name.substr(0, Name.size() - 4);
5445   } else if (Name.endswith("_dpp")) {
5446     setForcedDPP(true);
5447     return Name.substr(0, Name.size() - 4);
5448   } else if (Name.endswith("_sdwa")) {
5449     setForcedSDWA(true);
5450     return Name.substr(0, Name.size() - 5);
5451   }
5452   return Name;
5453 }
5454 
5455 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5456                                        StringRef Name,
5457                                        SMLoc NameLoc, OperandVector &Operands) {
5458   // Add the instruction mnemonic
5459   Name = parseMnemonicSuffix(Name);
5460   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5461 
5462   bool IsMIMG = Name.startswith("image_");
5463 
5464   while (!trySkipToken(AsmToken::EndOfStatement)) {
5465     OperandMode Mode = OperandMode_Default;
5466     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5467       Mode = OperandMode_NSA;
5468     CPolSeen = 0;
5469     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5470 
5471     if (Res != MatchOperand_Success) {
5472       checkUnsupportedInstruction(Name, NameLoc);
5473       if (!Parser.hasPendingError()) {
5474         // FIXME: use real operand location rather than the current location.
5475         StringRef Msg =
5476           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5477                                             "not a valid operand.";
5478         Error(getLoc(), Msg);
5479       }
5480       while (!trySkipToken(AsmToken::EndOfStatement)) {
5481         lex();
5482       }
5483       return true;
5484     }
5485 
5486     // Eat the comma or space if there is one.
5487     trySkipToken(AsmToken::Comma);
5488   }
5489 
5490   return false;
5491 }
5492 
5493 //===----------------------------------------------------------------------===//
5494 // Utility functions
5495 //===----------------------------------------------------------------------===//
5496 
5497 OperandMatchResultTy
5498 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5499 
5500   if (!trySkipId(Prefix, AsmToken::Colon))
5501     return MatchOperand_NoMatch;
5502 
5503   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5504 }
5505 
5506 OperandMatchResultTy
5507 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5508                                     AMDGPUOperand::ImmTy ImmTy,
5509                                     bool (*ConvertResult)(int64_t&)) {
5510   SMLoc S = getLoc();
5511   int64_t Value = 0;
5512 
5513   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5514   if (Res != MatchOperand_Success)
5515     return Res;
5516 
5517   if (ConvertResult && !ConvertResult(Value)) {
5518     Error(S, "invalid " + StringRef(Prefix) + " value.");
5519   }
5520 
5521   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5522   return MatchOperand_Success;
5523 }
5524 
5525 OperandMatchResultTy
5526 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5527                                              OperandVector &Operands,
5528                                              AMDGPUOperand::ImmTy ImmTy,
5529                                              bool (*ConvertResult)(int64_t&)) {
5530   SMLoc S = getLoc();
5531   if (!trySkipId(Prefix, AsmToken::Colon))
5532     return MatchOperand_NoMatch;
5533 
5534   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5535     return MatchOperand_ParseFail;
5536 
5537   unsigned Val = 0;
5538   const unsigned MaxSize = 4;
5539 
5540   // FIXME: How to verify the number of elements matches the number of src
5541   // operands?
5542   for (int I = 0; ; ++I) {
5543     int64_t Op;
5544     SMLoc Loc = getLoc();
5545     if (!parseExpr(Op))
5546       return MatchOperand_ParseFail;
5547 
5548     if (Op != 0 && Op != 1) {
5549       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5550       return MatchOperand_ParseFail;
5551     }
5552 
5553     Val |= (Op << I);
5554 
5555     if (trySkipToken(AsmToken::RBrac))
5556       break;
5557 
5558     if (I + 1 == MaxSize) {
5559       Error(getLoc(), "expected a closing square bracket");
5560       return MatchOperand_ParseFail;
5561     }
5562 
5563     if (!skipToken(AsmToken::Comma, "expected a comma"))
5564       return MatchOperand_ParseFail;
5565   }
5566 
5567   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5568   return MatchOperand_Success;
5569 }
5570 
5571 OperandMatchResultTy
5572 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5573                                AMDGPUOperand::ImmTy ImmTy) {
5574   int64_t Bit;
5575   SMLoc S = getLoc();
5576 
5577   if (trySkipId(Name)) {
5578     Bit = 1;
5579   } else if (trySkipId("no", Name)) {
5580     Bit = 0;
5581   } else {
5582     return MatchOperand_NoMatch;
5583   }
5584 
5585   if (Name == "r128" && !hasMIMG_R128()) {
5586     Error(S, "r128 modifier is not supported on this GPU");
5587     return MatchOperand_ParseFail;
5588   }
5589   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5590     Error(S, "a16 modifier is not supported on this GPU");
5591     return MatchOperand_ParseFail;
5592   }
5593 
5594   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5595     ImmTy = AMDGPUOperand::ImmTyR128A16;
5596 
5597   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5598   return MatchOperand_Success;
5599 }
5600 
5601 OperandMatchResultTy
5602 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5603   unsigned CPolOn = 0;
5604   unsigned CPolOff = 0;
5605   SMLoc S = getLoc();
5606 
5607   if (trySkipId("glc"))
5608     CPolOn = AMDGPU::CPol::GLC;
5609   else if (trySkipId("noglc"))
5610     CPolOff = AMDGPU::CPol::GLC;
5611   else if (trySkipId("slc"))
5612     CPolOn = AMDGPU::CPol::SLC;
5613   else if (trySkipId("noslc"))
5614     CPolOff = AMDGPU::CPol::SLC;
5615   else if (trySkipId("dlc"))
5616     CPolOn = AMDGPU::CPol::DLC;
5617   else if (trySkipId("nodlc"))
5618     CPolOff = AMDGPU::CPol::DLC;
5619   else if (trySkipId("scc"))
5620     CPolOn = AMDGPU::CPol::SCC;
5621   else if (trySkipId("noscc"))
5622     CPolOff = AMDGPU::CPol::SCC;
5623   else
5624     return MatchOperand_NoMatch;
5625 
5626   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5627     Error(S, "dlc modifier is not supported on this GPU");
5628     return MatchOperand_ParseFail;
5629   }
5630 
5631   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5632     Error(S, "scc modifier is not supported on this GPU");
5633     return MatchOperand_ParseFail;
5634   }
5635 
5636   if (CPolSeen & (CPolOn | CPolOff)) {
5637     Error(S, "duplicate cache policy modifier");
5638     return MatchOperand_ParseFail;
5639   }
5640 
5641   CPolSeen |= (CPolOn | CPolOff);
5642 
5643   for (unsigned I = 1; I != Operands.size(); ++I) {
5644     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5645     if (Op.isCPol()) {
5646       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5647       return MatchOperand_Success;
5648     }
5649   }
5650 
5651   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5652                                               AMDGPUOperand::ImmTyCPol));
5653 
5654   return MatchOperand_Success;
5655 }
5656 
5657 static void addOptionalImmOperand(
5658   MCInst& Inst, const OperandVector& Operands,
5659   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5660   AMDGPUOperand::ImmTy ImmT,
5661   int64_t Default = 0) {
5662   auto i = OptionalIdx.find(ImmT);
5663   if (i != OptionalIdx.end()) {
5664     unsigned Idx = i->second;
5665     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5666   } else {
5667     Inst.addOperand(MCOperand::createImm(Default));
5668   }
5669 }
5670 
5671 OperandMatchResultTy
5672 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5673                                        StringRef &Value,
5674                                        SMLoc &StringLoc) {
5675   if (!trySkipId(Prefix, AsmToken::Colon))
5676     return MatchOperand_NoMatch;
5677 
5678   StringLoc = getLoc();
5679   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5680                                                   : MatchOperand_ParseFail;
5681 }
5682 
5683 //===----------------------------------------------------------------------===//
5684 // MTBUF format
5685 //===----------------------------------------------------------------------===//
5686 
5687 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5688                                   int64_t MaxVal,
5689                                   int64_t &Fmt) {
5690   int64_t Val;
5691   SMLoc Loc = getLoc();
5692 
5693   auto Res = parseIntWithPrefix(Pref, Val);
5694   if (Res == MatchOperand_ParseFail)
5695     return false;
5696   if (Res == MatchOperand_NoMatch)
5697     return true;
5698 
5699   if (Val < 0 || Val > MaxVal) {
5700     Error(Loc, Twine("out of range ", StringRef(Pref)));
5701     return false;
5702   }
5703 
5704   Fmt = Val;
5705   return true;
5706 }
5707 
5708 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5709 // values to live in a joint format operand in the MCInst encoding.
5710 OperandMatchResultTy
5711 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5712   using namespace llvm::AMDGPU::MTBUFFormat;
5713 
5714   int64_t Dfmt = DFMT_UNDEF;
5715   int64_t Nfmt = NFMT_UNDEF;
5716 
5717   // dfmt and nfmt can appear in either order, and each is optional.
5718   for (int I = 0; I < 2; ++I) {
5719     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5720       return MatchOperand_ParseFail;
5721 
5722     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5723       return MatchOperand_ParseFail;
5724     }
5725     // Skip optional comma between dfmt/nfmt
5726     // but guard against 2 commas following each other.
5727     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5728         !peekToken().is(AsmToken::Comma)) {
5729       trySkipToken(AsmToken::Comma);
5730     }
5731   }
5732 
5733   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5734     return MatchOperand_NoMatch;
5735 
5736   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5737   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5738 
5739   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5740   return MatchOperand_Success;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5745   using namespace llvm::AMDGPU::MTBUFFormat;
5746 
5747   int64_t Fmt = UFMT_UNDEF;
5748 
5749   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5750     return MatchOperand_ParseFail;
5751 
5752   if (Fmt == UFMT_UNDEF)
5753     return MatchOperand_NoMatch;
5754 
5755   Format = Fmt;
5756   return MatchOperand_Success;
5757 }
5758 
5759 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5760                                     int64_t &Nfmt,
5761                                     StringRef FormatStr,
5762                                     SMLoc Loc) {
5763   using namespace llvm::AMDGPU::MTBUFFormat;
5764   int64_t Format;
5765 
5766   Format = getDfmt(FormatStr);
5767   if (Format != DFMT_UNDEF) {
5768     Dfmt = Format;
5769     return true;
5770   }
5771 
5772   Format = getNfmt(FormatStr, getSTI());
5773   if (Format != NFMT_UNDEF) {
5774     Nfmt = Format;
5775     return true;
5776   }
5777 
5778   Error(Loc, "unsupported format");
5779   return false;
5780 }
5781 
5782 OperandMatchResultTy
5783 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5784                                           SMLoc FormatLoc,
5785                                           int64_t &Format) {
5786   using namespace llvm::AMDGPU::MTBUFFormat;
5787 
5788   int64_t Dfmt = DFMT_UNDEF;
5789   int64_t Nfmt = NFMT_UNDEF;
5790   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5791     return MatchOperand_ParseFail;
5792 
5793   if (trySkipToken(AsmToken::Comma)) {
5794     StringRef Str;
5795     SMLoc Loc = getLoc();
5796     if (!parseId(Str, "expected a format string") ||
5797         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5798       return MatchOperand_ParseFail;
5799     }
5800     if (Dfmt == DFMT_UNDEF) {
5801       Error(Loc, "duplicate numeric format");
5802       return MatchOperand_ParseFail;
5803     } else if (Nfmt == NFMT_UNDEF) {
5804       Error(Loc, "duplicate data format");
5805       return MatchOperand_ParseFail;
5806     }
5807   }
5808 
5809   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5810   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5811 
5812   if (isGFX10Plus()) {
5813     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5814     if (Ufmt == UFMT_UNDEF) {
5815       Error(FormatLoc, "unsupported format");
5816       return MatchOperand_ParseFail;
5817     }
5818     Format = Ufmt;
5819   } else {
5820     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5821   }
5822 
5823   return MatchOperand_Success;
5824 }
5825 
5826 OperandMatchResultTy
5827 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5828                                             SMLoc Loc,
5829                                             int64_t &Format) {
5830   using namespace llvm::AMDGPU::MTBUFFormat;
5831 
5832   auto Id = getUnifiedFormat(FormatStr);
5833   if (Id == UFMT_UNDEF)
5834     return MatchOperand_NoMatch;
5835 
5836   if (!isGFX10Plus()) {
5837     Error(Loc, "unified format is not supported on this GPU");
5838     return MatchOperand_ParseFail;
5839   }
5840 
5841   Format = Id;
5842   return MatchOperand_Success;
5843 }
5844 
5845 OperandMatchResultTy
5846 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5847   using namespace llvm::AMDGPU::MTBUFFormat;
5848   SMLoc Loc = getLoc();
5849 
5850   if (!parseExpr(Format))
5851     return MatchOperand_ParseFail;
5852   if (!isValidFormatEncoding(Format, getSTI())) {
5853     Error(Loc, "out of range format");
5854     return MatchOperand_ParseFail;
5855   }
5856 
5857   return MatchOperand_Success;
5858 }
5859 
5860 OperandMatchResultTy
5861 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5862   using namespace llvm::AMDGPU::MTBUFFormat;
5863 
5864   if (!trySkipId("format", AsmToken::Colon))
5865     return MatchOperand_NoMatch;
5866 
5867   if (trySkipToken(AsmToken::LBrac)) {
5868     StringRef FormatStr;
5869     SMLoc Loc = getLoc();
5870     if (!parseId(FormatStr, "expected a format string"))
5871       return MatchOperand_ParseFail;
5872 
5873     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5874     if (Res == MatchOperand_NoMatch)
5875       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5876     if (Res != MatchOperand_Success)
5877       return Res;
5878 
5879     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5880       return MatchOperand_ParseFail;
5881 
5882     return MatchOperand_Success;
5883   }
5884 
5885   return parseNumericFormat(Format);
5886 }
5887 
5888 OperandMatchResultTy
5889 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5890   using namespace llvm::AMDGPU::MTBUFFormat;
5891 
5892   int64_t Format = getDefaultFormatEncoding(getSTI());
5893   OperandMatchResultTy Res;
5894   SMLoc Loc = getLoc();
5895 
5896   // Parse legacy format syntax.
5897   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5898   if (Res == MatchOperand_ParseFail)
5899     return Res;
5900 
5901   bool FormatFound = (Res == MatchOperand_Success);
5902 
5903   Operands.push_back(
5904     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5905 
5906   if (FormatFound)
5907     trySkipToken(AsmToken::Comma);
5908 
5909   if (isToken(AsmToken::EndOfStatement)) {
5910     // We are expecting an soffset operand,
5911     // but let matcher handle the error.
5912     return MatchOperand_Success;
5913   }
5914 
5915   // Parse soffset.
5916   Res = parseRegOrImm(Operands);
5917   if (Res != MatchOperand_Success)
5918     return Res;
5919 
5920   trySkipToken(AsmToken::Comma);
5921 
5922   if (!FormatFound) {
5923     Res = parseSymbolicOrNumericFormat(Format);
5924     if (Res == MatchOperand_ParseFail)
5925       return Res;
5926     if (Res == MatchOperand_Success) {
5927       auto Size = Operands.size();
5928       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5929       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5930       Op.setImm(Format);
5931     }
5932     return MatchOperand_Success;
5933   }
5934 
5935   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5936     Error(getLoc(), "duplicate format");
5937     return MatchOperand_ParseFail;
5938   }
5939   return MatchOperand_Success;
5940 }
5941 
5942 //===----------------------------------------------------------------------===//
5943 // ds
5944 //===----------------------------------------------------------------------===//
5945 
5946 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5947                                     const OperandVector &Operands) {
5948   OptionalImmIndexMap OptionalIdx;
5949 
5950   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5951     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5952 
5953     // Add the register arguments
5954     if (Op.isReg()) {
5955       Op.addRegOperands(Inst, 1);
5956       continue;
5957     }
5958 
5959     // Handle optional arguments
5960     OptionalIdx[Op.getImmTy()] = i;
5961   }
5962 
5963   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5965   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5966 
5967   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5968 }
5969 
5970 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5971                                 bool IsGdsHardcoded) {
5972   OptionalImmIndexMap OptionalIdx;
5973 
5974   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5975     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5976 
5977     // Add the register arguments
5978     if (Op.isReg()) {
5979       Op.addRegOperands(Inst, 1);
5980       continue;
5981     }
5982 
5983     if (Op.isToken() && Op.getToken() == "gds") {
5984       IsGdsHardcoded = true;
5985       continue;
5986     }
5987 
5988     // Handle optional arguments
5989     OptionalIdx[Op.getImmTy()] = i;
5990   }
5991 
5992   AMDGPUOperand::ImmTy OffsetType =
5993     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5994      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5995      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5996                                                       AMDGPUOperand::ImmTyOffset;
5997 
5998   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5999 
6000   if (!IsGdsHardcoded) {
6001     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6002   }
6003   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6004 }
6005 
6006 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6007   OptionalImmIndexMap OptionalIdx;
6008 
6009   unsigned OperandIdx[4];
6010   unsigned EnMask = 0;
6011   int SrcIdx = 0;
6012 
6013   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6014     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6015 
6016     // Add the register arguments
6017     if (Op.isReg()) {
6018       assert(SrcIdx < 4);
6019       OperandIdx[SrcIdx] = Inst.size();
6020       Op.addRegOperands(Inst, 1);
6021       ++SrcIdx;
6022       continue;
6023     }
6024 
6025     if (Op.isOff()) {
6026       assert(SrcIdx < 4);
6027       OperandIdx[SrcIdx] = Inst.size();
6028       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6029       ++SrcIdx;
6030       continue;
6031     }
6032 
6033     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6034       Op.addImmOperands(Inst, 1);
6035       continue;
6036     }
6037 
6038     if (Op.isToken() && Op.getToken() == "done")
6039       continue;
6040 
6041     // Handle optional arguments
6042     OptionalIdx[Op.getImmTy()] = i;
6043   }
6044 
6045   assert(SrcIdx == 4);
6046 
6047   bool Compr = false;
6048   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6049     Compr = true;
6050     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6051     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6052     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6053   }
6054 
6055   for (auto i = 0; i < SrcIdx; ++i) {
6056     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6057       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6058     }
6059   }
6060 
6061   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6063 
6064   Inst.addOperand(MCOperand::createImm(EnMask));
6065 }
6066 
6067 //===----------------------------------------------------------------------===//
6068 // s_waitcnt
6069 //===----------------------------------------------------------------------===//
6070 
6071 static bool
6072 encodeCnt(
6073   const AMDGPU::IsaVersion ISA,
6074   int64_t &IntVal,
6075   int64_t CntVal,
6076   bool Saturate,
6077   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6078   unsigned (*decode)(const IsaVersion &Version, unsigned))
6079 {
6080   bool Failed = false;
6081 
6082   IntVal = encode(ISA, IntVal, CntVal);
6083   if (CntVal != decode(ISA, IntVal)) {
6084     if (Saturate) {
6085       IntVal = encode(ISA, IntVal, -1);
6086     } else {
6087       Failed = true;
6088     }
6089   }
6090   return Failed;
6091 }
6092 
6093 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6094 
6095   SMLoc CntLoc = getLoc();
6096   StringRef CntName = getTokenStr();
6097 
6098   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6099       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6100     return false;
6101 
6102   int64_t CntVal;
6103   SMLoc ValLoc = getLoc();
6104   if (!parseExpr(CntVal))
6105     return false;
6106 
6107   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6108 
6109   bool Failed = true;
6110   bool Sat = CntName.endswith("_sat");
6111 
6112   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6113     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6114   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6115     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6116   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6117     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6118   } else {
6119     Error(CntLoc, "invalid counter name " + CntName);
6120     return false;
6121   }
6122 
6123   if (Failed) {
6124     Error(ValLoc, "too large value for " + CntName);
6125     return false;
6126   }
6127 
6128   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6129     return false;
6130 
6131   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6132     if (isToken(AsmToken::EndOfStatement)) {
6133       Error(getLoc(), "expected a counter name");
6134       return false;
6135     }
6136   }
6137 
6138   return true;
6139 }
6140 
6141 OperandMatchResultTy
6142 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6143   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6144   int64_t Waitcnt = getWaitcntBitMask(ISA);
6145   SMLoc S = getLoc();
6146 
6147   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6148     while (!isToken(AsmToken::EndOfStatement)) {
6149       if (!parseCnt(Waitcnt))
6150         return MatchOperand_ParseFail;
6151     }
6152   } else {
6153     if (!parseExpr(Waitcnt))
6154       return MatchOperand_ParseFail;
6155   }
6156 
6157   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6158   return MatchOperand_Success;
6159 }
6160 
6161 bool
6162 AMDGPUOperand::isSWaitCnt() const {
6163   return isImm();
6164 }
6165 
6166 //===----------------------------------------------------------------------===//
6167 // hwreg
6168 //===----------------------------------------------------------------------===//
6169 
6170 bool
6171 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6172                                 OperandInfoTy &Offset,
6173                                 OperandInfoTy &Width) {
6174   using namespace llvm::AMDGPU::Hwreg;
6175 
6176   // The register may be specified by name or using a numeric code
6177   HwReg.Loc = getLoc();
6178   if (isToken(AsmToken::Identifier) &&
6179       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6180     HwReg.IsSymbolic = true;
6181     lex(); // skip register name
6182   } else if (!parseExpr(HwReg.Id, "a register name")) {
6183     return false;
6184   }
6185 
6186   if (trySkipToken(AsmToken::RParen))
6187     return true;
6188 
6189   // parse optional params
6190   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6191     return false;
6192 
6193   Offset.Loc = getLoc();
6194   if (!parseExpr(Offset.Id))
6195     return false;
6196 
6197   if (!skipToken(AsmToken::Comma, "expected a comma"))
6198     return false;
6199 
6200   Width.Loc = getLoc();
6201   return parseExpr(Width.Id) &&
6202          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6203 }
6204 
6205 bool
6206 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6207                                const OperandInfoTy &Offset,
6208                                const OperandInfoTy &Width) {
6209 
6210   using namespace llvm::AMDGPU::Hwreg;
6211 
6212   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6213     Error(HwReg.Loc,
6214           "specified hardware register is not supported on this GPU");
6215     return false;
6216   }
6217   if (!isValidHwreg(HwReg.Id)) {
6218     Error(HwReg.Loc,
6219           "invalid code of hardware register: only 6-bit values are legal");
6220     return false;
6221   }
6222   if (!isValidHwregOffset(Offset.Id)) {
6223     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6224     return false;
6225   }
6226   if (!isValidHwregWidth(Width.Id)) {
6227     Error(Width.Loc,
6228           "invalid bitfield width: only values from 1 to 32 are legal");
6229     return false;
6230   }
6231   return true;
6232 }
6233 
6234 OperandMatchResultTy
6235 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6236   using namespace llvm::AMDGPU::Hwreg;
6237 
6238   int64_t ImmVal = 0;
6239   SMLoc Loc = getLoc();
6240 
6241   if (trySkipId("hwreg", AsmToken::LParen)) {
6242     OperandInfoTy HwReg(ID_UNKNOWN_);
6243     OperandInfoTy Offset(OFFSET_DEFAULT_);
6244     OperandInfoTy Width(WIDTH_DEFAULT_);
6245     if (parseHwregBody(HwReg, Offset, Width) &&
6246         validateHwreg(HwReg, Offset, Width)) {
6247       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6248     } else {
6249       return MatchOperand_ParseFail;
6250     }
6251   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6252     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6253       Error(Loc, "invalid immediate: only 16-bit values are legal");
6254       return MatchOperand_ParseFail;
6255     }
6256   } else {
6257     return MatchOperand_ParseFail;
6258   }
6259 
6260   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6261   return MatchOperand_Success;
6262 }
6263 
6264 bool AMDGPUOperand::isHwreg() const {
6265   return isImmTy(ImmTyHwreg);
6266 }
6267 
6268 //===----------------------------------------------------------------------===//
6269 // sendmsg
6270 //===----------------------------------------------------------------------===//
6271 
6272 bool
6273 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6274                                   OperandInfoTy &Op,
6275                                   OperandInfoTy &Stream) {
6276   using namespace llvm::AMDGPU::SendMsg;
6277 
6278   Msg.Loc = getLoc();
6279   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6280     Msg.IsSymbolic = true;
6281     lex(); // skip message name
6282   } else if (!parseExpr(Msg.Id, "a message name")) {
6283     return false;
6284   }
6285 
6286   if (trySkipToken(AsmToken::Comma)) {
6287     Op.IsDefined = true;
6288     Op.Loc = getLoc();
6289     if (isToken(AsmToken::Identifier) &&
6290         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6291       lex(); // skip operation name
6292     } else if (!parseExpr(Op.Id, "an operation name")) {
6293       return false;
6294     }
6295 
6296     if (trySkipToken(AsmToken::Comma)) {
6297       Stream.IsDefined = true;
6298       Stream.Loc = getLoc();
6299       if (!parseExpr(Stream.Id))
6300         return false;
6301     }
6302   }
6303 
6304   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6305 }
6306 
6307 bool
6308 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6309                                  const OperandInfoTy &Op,
6310                                  const OperandInfoTy &Stream) {
6311   using namespace llvm::AMDGPU::SendMsg;
6312 
6313   // Validation strictness depends on whether message is specified
6314   // in a symbolc or in a numeric form. In the latter case
6315   // only encoding possibility is checked.
6316   bool Strict = Msg.IsSymbolic;
6317 
6318   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6319     Error(Msg.Loc, "invalid message id");
6320     return false;
6321   }
6322   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6323     if (Op.IsDefined) {
6324       Error(Op.Loc, "message does not support operations");
6325     } else {
6326       Error(Msg.Loc, "missing message operation");
6327     }
6328     return false;
6329   }
6330   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6331     Error(Op.Loc, "invalid operation id");
6332     return false;
6333   }
6334   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6335     Error(Stream.Loc, "message operation does not support streams");
6336     return false;
6337   }
6338   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6339     Error(Stream.Loc, "invalid message stream id");
6340     return false;
6341   }
6342   return true;
6343 }
6344 
6345 OperandMatchResultTy
6346 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6347   using namespace llvm::AMDGPU::SendMsg;
6348 
6349   int64_t ImmVal = 0;
6350   SMLoc Loc = getLoc();
6351 
6352   if (trySkipId("sendmsg", AsmToken::LParen)) {
6353     OperandInfoTy Msg(ID_UNKNOWN_);
6354     OperandInfoTy Op(OP_NONE_);
6355     OperandInfoTy Stream(STREAM_ID_NONE_);
6356     if (parseSendMsgBody(Msg, Op, Stream) &&
6357         validateSendMsg(Msg, Op, Stream)) {
6358       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6359     } else {
6360       return MatchOperand_ParseFail;
6361     }
6362   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6363     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6364       Error(Loc, "invalid immediate: only 16-bit values are legal");
6365       return MatchOperand_ParseFail;
6366     }
6367   } else {
6368     return MatchOperand_ParseFail;
6369   }
6370 
6371   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6372   return MatchOperand_Success;
6373 }
6374 
6375 bool AMDGPUOperand::isSendMsg() const {
6376   return isImmTy(ImmTySendMsg);
6377 }
6378 
6379 //===----------------------------------------------------------------------===//
6380 // v_interp
6381 //===----------------------------------------------------------------------===//
6382 
6383 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6384   StringRef Str;
6385   SMLoc S = getLoc();
6386 
6387   if (!parseId(Str))
6388     return MatchOperand_NoMatch;
6389 
6390   int Slot = StringSwitch<int>(Str)
6391     .Case("p10", 0)
6392     .Case("p20", 1)
6393     .Case("p0", 2)
6394     .Default(-1);
6395 
6396   if (Slot == -1) {
6397     Error(S, "invalid interpolation slot");
6398     return MatchOperand_ParseFail;
6399   }
6400 
6401   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6402                                               AMDGPUOperand::ImmTyInterpSlot));
6403   return MatchOperand_Success;
6404 }
6405 
6406 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6407   StringRef Str;
6408   SMLoc S = getLoc();
6409 
6410   if (!parseId(Str))
6411     return MatchOperand_NoMatch;
6412 
6413   if (!Str.startswith("attr")) {
6414     Error(S, "invalid interpolation attribute");
6415     return MatchOperand_ParseFail;
6416   }
6417 
6418   StringRef Chan = Str.take_back(2);
6419   int AttrChan = StringSwitch<int>(Chan)
6420     .Case(".x", 0)
6421     .Case(".y", 1)
6422     .Case(".z", 2)
6423     .Case(".w", 3)
6424     .Default(-1);
6425   if (AttrChan == -1) {
6426     Error(S, "invalid or missing interpolation attribute channel");
6427     return MatchOperand_ParseFail;
6428   }
6429 
6430   Str = Str.drop_back(2).drop_front(4);
6431 
6432   uint8_t Attr;
6433   if (Str.getAsInteger(10, Attr)) {
6434     Error(S, "invalid or missing interpolation attribute number");
6435     return MatchOperand_ParseFail;
6436   }
6437 
6438   if (Attr > 63) {
6439     Error(S, "out of bounds interpolation attribute number");
6440     return MatchOperand_ParseFail;
6441   }
6442 
6443   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6444 
6445   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6446                                               AMDGPUOperand::ImmTyInterpAttr));
6447   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6448                                               AMDGPUOperand::ImmTyAttrChan));
6449   return MatchOperand_Success;
6450 }
6451 
6452 //===----------------------------------------------------------------------===//
6453 // exp
6454 //===----------------------------------------------------------------------===//
6455 
6456 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6457   using namespace llvm::AMDGPU::Exp;
6458 
6459   StringRef Str;
6460   SMLoc S = getLoc();
6461 
6462   if (!parseId(Str))
6463     return MatchOperand_NoMatch;
6464 
6465   unsigned Id = getTgtId(Str);
6466   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6467     Error(S, (Id == ET_INVALID) ?
6468                 "invalid exp target" :
6469                 "exp target is not supported on this GPU");
6470     return MatchOperand_ParseFail;
6471   }
6472 
6473   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6474                                               AMDGPUOperand::ImmTyExpTgt));
6475   return MatchOperand_Success;
6476 }
6477 
6478 //===----------------------------------------------------------------------===//
6479 // parser helpers
6480 //===----------------------------------------------------------------------===//
6481 
6482 bool
6483 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6484   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6485 }
6486 
6487 bool
6488 AMDGPUAsmParser::isId(const StringRef Id) const {
6489   return isId(getToken(), Id);
6490 }
6491 
6492 bool
6493 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6494   return getTokenKind() == Kind;
6495 }
6496 
6497 bool
6498 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6499   if (isId(Id)) {
6500     lex();
6501     return true;
6502   }
6503   return false;
6504 }
6505 
6506 bool
6507 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6508   if (isToken(AsmToken::Identifier)) {
6509     StringRef Tok = getTokenStr();
6510     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6511       lex();
6512       return true;
6513     }
6514   }
6515   return false;
6516 }
6517 
6518 bool
6519 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6520   if (isId(Id) && peekToken().is(Kind)) {
6521     lex();
6522     lex();
6523     return true;
6524   }
6525   return false;
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6530   if (isToken(Kind)) {
6531     lex();
6532     return true;
6533   }
6534   return false;
6535 }
6536 
6537 bool
6538 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6539                            const StringRef ErrMsg) {
6540   if (!trySkipToken(Kind)) {
6541     Error(getLoc(), ErrMsg);
6542     return false;
6543   }
6544   return true;
6545 }
6546 
6547 bool
6548 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6549   SMLoc S = getLoc();
6550 
6551   const MCExpr *Expr;
6552   if (Parser.parseExpression(Expr))
6553     return false;
6554 
6555   if (Expr->evaluateAsAbsolute(Imm))
6556     return true;
6557 
6558   if (Expected.empty()) {
6559     Error(S, "expected absolute expression");
6560   } else {
6561     Error(S, Twine("expected ", Expected) +
6562              Twine(" or an absolute expression"));
6563   }
6564   return false;
6565 }
6566 
6567 bool
6568 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6569   SMLoc S = getLoc();
6570 
6571   const MCExpr *Expr;
6572   if (Parser.parseExpression(Expr))
6573     return false;
6574 
6575   int64_t IntVal;
6576   if (Expr->evaluateAsAbsolute(IntVal)) {
6577     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6578   } else {
6579     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6580   }
6581   return true;
6582 }
6583 
6584 bool
6585 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6586   if (isToken(AsmToken::String)) {
6587     Val = getToken().getStringContents();
6588     lex();
6589     return true;
6590   } else {
6591     Error(getLoc(), ErrMsg);
6592     return false;
6593   }
6594 }
6595 
6596 bool
6597 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6598   if (isToken(AsmToken::Identifier)) {
6599     Val = getTokenStr();
6600     lex();
6601     return true;
6602   } else {
6603     if (!ErrMsg.empty())
6604       Error(getLoc(), ErrMsg);
6605     return false;
6606   }
6607 }
6608 
6609 AsmToken
6610 AMDGPUAsmParser::getToken() const {
6611   return Parser.getTok();
6612 }
6613 
6614 AsmToken
6615 AMDGPUAsmParser::peekToken() {
6616   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6617 }
6618 
6619 void
6620 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6621   auto TokCount = getLexer().peekTokens(Tokens);
6622 
6623   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6624     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6625 }
6626 
6627 AsmToken::TokenKind
6628 AMDGPUAsmParser::getTokenKind() const {
6629   return getLexer().getKind();
6630 }
6631 
6632 SMLoc
6633 AMDGPUAsmParser::getLoc() const {
6634   return getToken().getLoc();
6635 }
6636 
6637 StringRef
6638 AMDGPUAsmParser::getTokenStr() const {
6639   return getToken().getString();
6640 }
6641 
6642 void
6643 AMDGPUAsmParser::lex() {
6644   Parser.Lex();
6645 }
6646 
6647 SMLoc
6648 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6649                                const OperandVector &Operands) const {
6650   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6651     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6652     if (Test(Op))
6653       return Op.getStartLoc();
6654   }
6655   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6656 }
6657 
6658 SMLoc
6659 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6660                            const OperandVector &Operands) const {
6661   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6662   return getOperandLoc(Test, Operands);
6663 }
6664 
6665 SMLoc
6666 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6667                            const OperandVector &Operands) const {
6668   auto Test = [=](const AMDGPUOperand& Op) {
6669     return Op.isRegKind() && Op.getReg() == Reg;
6670   };
6671   return getOperandLoc(Test, Operands);
6672 }
6673 
6674 SMLoc
6675 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6676   auto Test = [](const AMDGPUOperand& Op) {
6677     return Op.IsImmKindLiteral() || Op.isExpr();
6678   };
6679   return getOperandLoc(Test, Operands);
6680 }
6681 
6682 SMLoc
6683 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6684   auto Test = [](const AMDGPUOperand& Op) {
6685     return Op.isImmKindConst();
6686   };
6687   return getOperandLoc(Test, Operands);
6688 }
6689 
6690 //===----------------------------------------------------------------------===//
6691 // swizzle
6692 //===----------------------------------------------------------------------===//
6693 
6694 LLVM_READNONE
6695 static unsigned
6696 encodeBitmaskPerm(const unsigned AndMask,
6697                   const unsigned OrMask,
6698                   const unsigned XorMask) {
6699   using namespace llvm::AMDGPU::Swizzle;
6700 
6701   return BITMASK_PERM_ENC |
6702          (AndMask << BITMASK_AND_SHIFT) |
6703          (OrMask  << BITMASK_OR_SHIFT)  |
6704          (XorMask << BITMASK_XOR_SHIFT);
6705 }
6706 
6707 bool
6708 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6709                                      const unsigned MinVal,
6710                                      const unsigned MaxVal,
6711                                      const StringRef ErrMsg,
6712                                      SMLoc &Loc) {
6713   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6714     return false;
6715   }
6716   Loc = getLoc();
6717   if (!parseExpr(Op)) {
6718     return false;
6719   }
6720   if (Op < MinVal || Op > MaxVal) {
6721     Error(Loc, ErrMsg);
6722     return false;
6723   }
6724 
6725   return true;
6726 }
6727 
6728 bool
6729 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6730                                       const unsigned MinVal,
6731                                       const unsigned MaxVal,
6732                                       const StringRef ErrMsg) {
6733   SMLoc Loc;
6734   for (unsigned i = 0; i < OpNum; ++i) {
6735     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6736       return false;
6737   }
6738 
6739   return true;
6740 }
6741 
6742 bool
6743 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6744   using namespace llvm::AMDGPU::Swizzle;
6745 
6746   int64_t Lane[LANE_NUM];
6747   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6748                            "expected a 2-bit lane id")) {
6749     Imm = QUAD_PERM_ENC;
6750     for (unsigned I = 0; I < LANE_NUM; ++I) {
6751       Imm |= Lane[I] << (LANE_SHIFT * I);
6752     }
6753     return true;
6754   }
6755   return false;
6756 }
6757 
6758 bool
6759 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6760   using namespace llvm::AMDGPU::Swizzle;
6761 
6762   SMLoc Loc;
6763   int64_t GroupSize;
6764   int64_t LaneIdx;
6765 
6766   if (!parseSwizzleOperand(GroupSize,
6767                            2, 32,
6768                            "group size must be in the interval [2,32]",
6769                            Loc)) {
6770     return false;
6771   }
6772   if (!isPowerOf2_64(GroupSize)) {
6773     Error(Loc, "group size must be a power of two");
6774     return false;
6775   }
6776   if (parseSwizzleOperand(LaneIdx,
6777                           0, GroupSize - 1,
6778                           "lane id must be in the interval [0,group size - 1]",
6779                           Loc)) {
6780     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6781     return true;
6782   }
6783   return false;
6784 }
6785 
6786 bool
6787 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6788   using namespace llvm::AMDGPU::Swizzle;
6789 
6790   SMLoc Loc;
6791   int64_t GroupSize;
6792 
6793   if (!parseSwizzleOperand(GroupSize,
6794                            2, 32,
6795                            "group size must be in the interval [2,32]",
6796                            Loc)) {
6797     return false;
6798   }
6799   if (!isPowerOf2_64(GroupSize)) {
6800     Error(Loc, "group size must be a power of two");
6801     return false;
6802   }
6803 
6804   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6805   return true;
6806 }
6807 
6808 bool
6809 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6810   using namespace llvm::AMDGPU::Swizzle;
6811 
6812   SMLoc Loc;
6813   int64_t GroupSize;
6814 
6815   if (!parseSwizzleOperand(GroupSize,
6816                            1, 16,
6817                            "group size must be in the interval [1,16]",
6818                            Loc)) {
6819     return false;
6820   }
6821   if (!isPowerOf2_64(GroupSize)) {
6822     Error(Loc, "group size must be a power of two");
6823     return false;
6824   }
6825 
6826   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6827   return true;
6828 }
6829 
6830 bool
6831 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6832   using namespace llvm::AMDGPU::Swizzle;
6833 
6834   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6835     return false;
6836   }
6837 
6838   StringRef Ctl;
6839   SMLoc StrLoc = getLoc();
6840   if (!parseString(Ctl)) {
6841     return false;
6842   }
6843   if (Ctl.size() != BITMASK_WIDTH) {
6844     Error(StrLoc, "expected a 5-character mask");
6845     return false;
6846   }
6847 
6848   unsigned AndMask = 0;
6849   unsigned OrMask = 0;
6850   unsigned XorMask = 0;
6851 
6852   for (size_t i = 0; i < Ctl.size(); ++i) {
6853     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6854     switch(Ctl[i]) {
6855     default:
6856       Error(StrLoc, "invalid mask");
6857       return false;
6858     case '0':
6859       break;
6860     case '1':
6861       OrMask |= Mask;
6862       break;
6863     case 'p':
6864       AndMask |= Mask;
6865       break;
6866     case 'i':
6867       AndMask |= Mask;
6868       XorMask |= Mask;
6869       break;
6870     }
6871   }
6872 
6873   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6874   return true;
6875 }
6876 
6877 bool
6878 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6879 
6880   SMLoc OffsetLoc = getLoc();
6881 
6882   if (!parseExpr(Imm, "a swizzle macro")) {
6883     return false;
6884   }
6885   if (!isUInt<16>(Imm)) {
6886     Error(OffsetLoc, "expected a 16-bit offset");
6887     return false;
6888   }
6889   return true;
6890 }
6891 
6892 bool
6893 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6894   using namespace llvm::AMDGPU::Swizzle;
6895 
6896   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6897 
6898     SMLoc ModeLoc = getLoc();
6899     bool Ok = false;
6900 
6901     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6902       Ok = parseSwizzleQuadPerm(Imm);
6903     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6904       Ok = parseSwizzleBitmaskPerm(Imm);
6905     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6906       Ok = parseSwizzleBroadcast(Imm);
6907     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6908       Ok = parseSwizzleSwap(Imm);
6909     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6910       Ok = parseSwizzleReverse(Imm);
6911     } else {
6912       Error(ModeLoc, "expected a swizzle mode");
6913     }
6914 
6915     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6916   }
6917 
6918   return false;
6919 }
6920 
6921 OperandMatchResultTy
6922 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6923   SMLoc S = getLoc();
6924   int64_t Imm = 0;
6925 
6926   if (trySkipId("offset")) {
6927 
6928     bool Ok = false;
6929     if (skipToken(AsmToken::Colon, "expected a colon")) {
6930       if (trySkipId("swizzle")) {
6931         Ok = parseSwizzleMacro(Imm);
6932       } else {
6933         Ok = parseSwizzleOffset(Imm);
6934       }
6935     }
6936 
6937     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6938 
6939     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6940   } else {
6941     // Swizzle "offset" operand is optional.
6942     // If it is omitted, try parsing other optional operands.
6943     return parseOptionalOpr(Operands);
6944   }
6945 }
6946 
6947 bool
6948 AMDGPUOperand::isSwizzle() const {
6949   return isImmTy(ImmTySwizzle);
6950 }
6951 
6952 //===----------------------------------------------------------------------===//
6953 // VGPR Index Mode
6954 //===----------------------------------------------------------------------===//
6955 
6956 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6957 
6958   using namespace llvm::AMDGPU::VGPRIndexMode;
6959 
6960   if (trySkipToken(AsmToken::RParen)) {
6961     return OFF;
6962   }
6963 
6964   int64_t Imm = 0;
6965 
6966   while (true) {
6967     unsigned Mode = 0;
6968     SMLoc S = getLoc();
6969 
6970     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6971       if (trySkipId(IdSymbolic[ModeId])) {
6972         Mode = 1 << ModeId;
6973         break;
6974       }
6975     }
6976 
6977     if (Mode == 0) {
6978       Error(S, (Imm == 0)?
6979                "expected a VGPR index mode or a closing parenthesis" :
6980                "expected a VGPR index mode");
6981       return UNDEF;
6982     }
6983 
6984     if (Imm & Mode) {
6985       Error(S, "duplicate VGPR index mode");
6986       return UNDEF;
6987     }
6988     Imm |= Mode;
6989 
6990     if (trySkipToken(AsmToken::RParen))
6991       break;
6992     if (!skipToken(AsmToken::Comma,
6993                    "expected a comma or a closing parenthesis"))
6994       return UNDEF;
6995   }
6996 
6997   return Imm;
6998 }
6999 
7000 OperandMatchResultTy
7001 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7002 
7003   using namespace llvm::AMDGPU::VGPRIndexMode;
7004 
7005   int64_t Imm = 0;
7006   SMLoc S = getLoc();
7007 
7008   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7009     Imm = parseGPRIdxMacro();
7010     if (Imm == UNDEF)
7011       return MatchOperand_ParseFail;
7012   } else {
7013     if (getParser().parseAbsoluteExpression(Imm))
7014       return MatchOperand_ParseFail;
7015     if (Imm < 0 || !isUInt<4>(Imm)) {
7016       Error(S, "invalid immediate: only 4-bit values are legal");
7017       return MatchOperand_ParseFail;
7018     }
7019   }
7020 
7021   Operands.push_back(
7022       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7023   return MatchOperand_Success;
7024 }
7025 
7026 bool AMDGPUOperand::isGPRIdxMode() const {
7027   return isImmTy(ImmTyGprIdxMode);
7028 }
7029 
7030 //===----------------------------------------------------------------------===//
7031 // sopp branch targets
7032 //===----------------------------------------------------------------------===//
7033 
7034 OperandMatchResultTy
7035 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7036 
7037   // Make sure we are not parsing something
7038   // that looks like a label or an expression but is not.
7039   // This will improve error messages.
7040   if (isRegister() || isModifier())
7041     return MatchOperand_NoMatch;
7042 
7043   if (!parseExpr(Operands))
7044     return MatchOperand_ParseFail;
7045 
7046   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7047   assert(Opr.isImm() || Opr.isExpr());
7048   SMLoc Loc = Opr.getStartLoc();
7049 
7050   // Currently we do not support arbitrary expressions as branch targets.
7051   // Only labels and absolute expressions are accepted.
7052   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7053     Error(Loc, "expected an absolute expression or a label");
7054   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7055     Error(Loc, "expected a 16-bit signed jump offset");
7056   }
7057 
7058   return MatchOperand_Success;
7059 }
7060 
7061 //===----------------------------------------------------------------------===//
7062 // Boolean holding registers
7063 //===----------------------------------------------------------------------===//
7064 
7065 OperandMatchResultTy
7066 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7067   return parseReg(Operands);
7068 }
7069 
7070 //===----------------------------------------------------------------------===//
7071 // mubuf
7072 //===----------------------------------------------------------------------===//
7073 
7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7076 }
7077 
7078 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7079                                    const OperandVector &Operands,
7080                                    bool IsAtomic,
7081                                    bool IsLds) {
7082   bool IsLdsOpcode = IsLds;
7083   bool HasLdsModifier = false;
7084   OptionalImmIndexMap OptionalIdx;
7085   unsigned FirstOperandIdx = 1;
7086   bool IsAtomicReturn = false;
7087 
7088   if (IsAtomic) {
7089     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7090       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7091       if (!Op.isCPol())
7092         continue;
7093       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7094       break;
7095     }
7096 
7097     if (!IsAtomicReturn) {
7098       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7099       if (NewOpc != -1)
7100         Inst.setOpcode(NewOpc);
7101     }
7102 
7103     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7104                       SIInstrFlags::IsAtomicRet;
7105   }
7106 
7107   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7108     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7109 
7110     // Add the register arguments
7111     if (Op.isReg()) {
7112       Op.addRegOperands(Inst, 1);
7113       // Insert a tied src for atomic return dst.
7114       // This cannot be postponed as subsequent calls to
7115       // addImmOperands rely on correct number of MC operands.
7116       if (IsAtomicReturn && i == FirstOperandIdx)
7117         Op.addRegOperands(Inst, 1);
7118       continue;
7119     }
7120 
7121     // Handle the case where soffset is an immediate
7122     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7123       Op.addImmOperands(Inst, 1);
7124       continue;
7125     }
7126 
7127     HasLdsModifier |= Op.isLDS();
7128 
7129     // Handle tokens like 'offen' which are sometimes hard-coded into the
7130     // asm string.  There are no MCInst operands for these.
7131     if (Op.isToken()) {
7132       continue;
7133     }
7134     assert(Op.isImm());
7135 
7136     // Handle optional arguments
7137     OptionalIdx[Op.getImmTy()] = i;
7138   }
7139 
7140   // This is a workaround for an llvm quirk which may result in an
7141   // incorrect instruction selection. Lds and non-lds versions of
7142   // MUBUF instructions are identical except that lds versions
7143   // have mandatory 'lds' modifier. However this modifier follows
7144   // optional modifiers and llvm asm matcher regards this 'lds'
7145   // modifier as an optional one. As a result, an lds version
7146   // of opcode may be selected even if it has no 'lds' modifier.
7147   if (IsLdsOpcode && !HasLdsModifier) {
7148     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7149     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7150       Inst.setOpcode(NoLdsOpcode);
7151       IsLdsOpcode = false;
7152     }
7153   }
7154 
7155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7157 
7158   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7159     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7160   }
7161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7162 }
7163 
7164 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7165   OptionalImmIndexMap OptionalIdx;
7166 
7167   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7168     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7169 
7170     // Add the register arguments
7171     if (Op.isReg()) {
7172       Op.addRegOperands(Inst, 1);
7173       continue;
7174     }
7175 
7176     // Handle the case where soffset is an immediate
7177     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7178       Op.addImmOperands(Inst, 1);
7179       continue;
7180     }
7181 
7182     // Handle tokens like 'offen' which are sometimes hard-coded into the
7183     // asm string.  There are no MCInst operands for these.
7184     if (Op.isToken()) {
7185       continue;
7186     }
7187     assert(Op.isImm());
7188 
7189     // Handle optional arguments
7190     OptionalIdx[Op.getImmTy()] = i;
7191   }
7192 
7193   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7194                         AMDGPUOperand::ImmTyOffset);
7195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7196   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7199 }
7200 
7201 //===----------------------------------------------------------------------===//
7202 // mimg
7203 //===----------------------------------------------------------------------===//
7204 
7205 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7206                               bool IsAtomic) {
7207   unsigned I = 1;
7208   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7209   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7210     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7211   }
7212 
7213   if (IsAtomic) {
7214     // Add src, same as dst
7215     assert(Desc.getNumDefs() == 1);
7216     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7217   }
7218 
7219   OptionalImmIndexMap OptionalIdx;
7220 
7221   for (unsigned E = Operands.size(); I != E; ++I) {
7222     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7223 
7224     // Add the register arguments
7225     if (Op.isReg()) {
7226       Op.addRegOperands(Inst, 1);
7227     } else if (Op.isImmModifier()) {
7228       OptionalIdx[Op.getImmTy()] = I;
7229     } else if (!Op.isToken()) {
7230       llvm_unreachable("unexpected operand type");
7231     }
7232   }
7233 
7234   bool IsGFX10Plus = isGFX10Plus();
7235 
7236   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7237   if (IsGFX10Plus)
7238     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7241   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7242   if (IsGFX10Plus)
7243     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7244   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7245     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7246   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7247   if (!IsGFX10Plus)
7248     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7249   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7250 }
7251 
7252 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7253   cvtMIMG(Inst, Operands, true);
7254 }
7255 
7256 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7257   OptionalImmIndexMap OptionalIdx;
7258   bool IsAtomicReturn = false;
7259 
7260   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7261     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7262     if (!Op.isCPol())
7263       continue;
7264     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7265     break;
7266   }
7267 
7268   if (!IsAtomicReturn) {
7269     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7270     if (NewOpc != -1)
7271       Inst.setOpcode(NewOpc);
7272   }
7273 
7274   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7275                     SIInstrFlags::IsAtomicRet;
7276 
7277   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7278     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7279 
7280     // Add the register arguments
7281     if (Op.isReg()) {
7282       Op.addRegOperands(Inst, 1);
7283       if (IsAtomicReturn && i == 1)
7284         Op.addRegOperands(Inst, 1);
7285       continue;
7286     }
7287 
7288     // Handle the case where soffset is an immediate
7289     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7290       Op.addImmOperands(Inst, 1);
7291       continue;
7292     }
7293 
7294     // Handle tokens like 'offen' which are sometimes hard-coded into the
7295     // asm string.  There are no MCInst operands for these.
7296     if (Op.isToken()) {
7297       continue;
7298     }
7299     assert(Op.isImm());
7300 
7301     // Handle optional arguments
7302     OptionalIdx[Op.getImmTy()] = i;
7303   }
7304 
7305   if ((int)Inst.getNumOperands() <=
7306       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7307     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7308   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7309 }
7310 
7311 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7312                                       const OperandVector &Operands) {
7313   for (unsigned I = 1; I < Operands.size(); ++I) {
7314     auto &Operand = (AMDGPUOperand &)*Operands[I];
7315     if (Operand.isReg())
7316       Operand.addRegOperands(Inst, 1);
7317   }
7318 
7319   Inst.addOperand(MCOperand::createImm(1)); // a16
7320 }
7321 
7322 //===----------------------------------------------------------------------===//
7323 // smrd
7324 //===----------------------------------------------------------------------===//
7325 
7326 bool AMDGPUOperand::isSMRDOffset8() const {
7327   return isImm() && isUInt<8>(getImm());
7328 }
7329 
7330 bool AMDGPUOperand::isSMEMOffset() const {
7331   return isImm(); // Offset range is checked later by validator.
7332 }
7333 
7334 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7335   // 32-bit literals are only supported on CI and we only want to use them
7336   // when the offset is > 8-bits.
7337   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7338 }
7339 
7340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7342 }
7343 
7344 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7345   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7346 }
7347 
7348 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7349   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7350 }
7351 
7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7353   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7354 }
7355 
7356 //===----------------------------------------------------------------------===//
7357 // vop3
7358 //===----------------------------------------------------------------------===//
7359 
7360 static bool ConvertOmodMul(int64_t &Mul) {
7361   if (Mul != 1 && Mul != 2 && Mul != 4)
7362     return false;
7363 
7364   Mul >>= 1;
7365   return true;
7366 }
7367 
7368 static bool ConvertOmodDiv(int64_t &Div) {
7369   if (Div == 1) {
7370     Div = 0;
7371     return true;
7372   }
7373 
7374   if (Div == 2) {
7375     Div = 3;
7376     return true;
7377   }
7378 
7379   return false;
7380 }
7381 
7382 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7383 // This is intentional and ensures compatibility with sp3.
7384 // See bug 35397 for details.
7385 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7386   if (BoundCtrl == 0 || BoundCtrl == 1) {
7387     BoundCtrl = 1;
7388     return true;
7389   }
7390   return false;
7391 }
7392 
7393 // Note: the order in this table matches the order of operands in AsmString.
7394 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7395   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7396   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7397   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7398   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7399   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7400   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7401   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7402   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7403   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7404   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7405   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7406   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7407   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7408   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7409   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7410   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7411   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7412   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7413   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7414   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7415   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7416   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7417   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7418   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7419   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7420   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7421   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7422   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7423   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7424   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7425   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7426   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7427   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7428   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7429   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7430   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7431   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7432   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7433   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7434   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7435   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7436 };
7437 
7438 void AMDGPUAsmParser::onBeginOfFile() {
7439   if (!getParser().getStreamer().getTargetStreamer() ||
7440       getSTI().getTargetTriple().getArch() == Triple::r600)
7441     return;
7442 
7443   if (!getTargetStreamer().getTargetID())
7444     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7445 
7446   if (isHsaAbiVersion3AndAbove(&getSTI()))
7447     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7448 }
7449 
7450 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7451 
7452   OperandMatchResultTy res = parseOptionalOpr(Operands);
7453 
7454   // This is a hack to enable hardcoded mandatory operands which follow
7455   // optional operands.
7456   //
7457   // Current design assumes that all operands after the first optional operand
7458   // are also optional. However implementation of some instructions violates
7459   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7460   //
7461   // To alleviate this problem, we have to (implicitly) parse extra operands
7462   // to make sure autogenerated parser of custom operands never hit hardcoded
7463   // mandatory operands.
7464 
7465   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7466     if (res != MatchOperand_Success ||
7467         isToken(AsmToken::EndOfStatement))
7468       break;
7469 
7470     trySkipToken(AsmToken::Comma);
7471     res = parseOptionalOpr(Operands);
7472   }
7473 
7474   return res;
7475 }
7476 
7477 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7478   OperandMatchResultTy res;
7479   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7480     // try to parse any optional operand here
7481     if (Op.IsBit) {
7482       res = parseNamedBit(Op.Name, Operands, Op.Type);
7483     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7484       res = parseOModOperand(Operands);
7485     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7486                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7487                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7488       res = parseSDWASel(Operands, Op.Name, Op.Type);
7489     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7490       res = parseSDWADstUnused(Operands);
7491     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7492                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7493                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7494                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7495       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7496                                         Op.ConvertResult);
7497     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7498       res = parseDim(Operands);
7499     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7500       res = parseCPol(Operands);
7501     } else {
7502       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7503     }
7504     if (res != MatchOperand_NoMatch) {
7505       return res;
7506     }
7507   }
7508   return MatchOperand_NoMatch;
7509 }
7510 
7511 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7512   StringRef Name = getTokenStr();
7513   if (Name == "mul") {
7514     return parseIntWithPrefix("mul", Operands,
7515                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7516   }
7517 
7518   if (Name == "div") {
7519     return parseIntWithPrefix("div", Operands,
7520                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7521   }
7522 
7523   return MatchOperand_NoMatch;
7524 }
7525 
7526 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7527   cvtVOP3P(Inst, Operands);
7528 
7529   int Opc = Inst.getOpcode();
7530 
7531   int SrcNum;
7532   const int Ops[] = { AMDGPU::OpName::src0,
7533                       AMDGPU::OpName::src1,
7534                       AMDGPU::OpName::src2 };
7535   for (SrcNum = 0;
7536        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7537        ++SrcNum);
7538   assert(SrcNum > 0);
7539 
7540   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7541   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7542 
7543   if ((OpSel & (1 << SrcNum)) != 0) {
7544     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7545     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7546     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7547   }
7548 }
7549 
7550 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7551       // 1. This operand is input modifiers
7552   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7553       // 2. This is not last operand
7554       && Desc.NumOperands > (OpNum + 1)
7555       // 3. Next operand is register class
7556       && Desc.OpInfo[OpNum + 1].RegClass != -1
7557       // 4. Next register is not tied to any other operand
7558       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7559 }
7560 
7561 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7562 {
7563   OptionalImmIndexMap OptionalIdx;
7564   unsigned Opc = Inst.getOpcode();
7565 
7566   unsigned I = 1;
7567   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7568   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7569     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7570   }
7571 
7572   for (unsigned E = Operands.size(); I != E; ++I) {
7573     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7574     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7575       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7576     } else if (Op.isInterpSlot() ||
7577                Op.isInterpAttr() ||
7578                Op.isAttrChan()) {
7579       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7580     } else if (Op.isImmModifier()) {
7581       OptionalIdx[Op.getImmTy()] = I;
7582     } else {
7583       llvm_unreachable("unhandled operand type");
7584     }
7585   }
7586 
7587   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7588     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7589   }
7590 
7591   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7592     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7593   }
7594 
7595   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7596     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7597   }
7598 }
7599 
7600 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7601                               OptionalImmIndexMap &OptionalIdx) {
7602   unsigned Opc = Inst.getOpcode();
7603 
7604   unsigned I = 1;
7605   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7606   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7607     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7608   }
7609 
7610   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7611     // This instruction has src modifiers
7612     for (unsigned E = Operands.size(); I != E; ++I) {
7613       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7614       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7615         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7616       } else if (Op.isImmModifier()) {
7617         OptionalIdx[Op.getImmTy()] = I;
7618       } else if (Op.isRegOrImm()) {
7619         Op.addRegOrImmOperands(Inst, 1);
7620       } else {
7621         llvm_unreachable("unhandled operand type");
7622       }
7623     }
7624   } else {
7625     // No src modifiers
7626     for (unsigned E = Operands.size(); I != E; ++I) {
7627       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7628       if (Op.isMod()) {
7629         OptionalIdx[Op.getImmTy()] = I;
7630       } else {
7631         Op.addRegOrImmOperands(Inst, 1);
7632       }
7633     }
7634   }
7635 
7636   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7637     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7638   }
7639 
7640   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7641     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7642   }
7643 
7644   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7645   // it has src2 register operand that is tied to dst operand
7646   // we don't allow modifiers for this operand in assembler so src2_modifiers
7647   // should be 0.
7648   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7649       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7650       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7651       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7652       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7653       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7654       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7655       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7656       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7657       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7658       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7659     auto it = Inst.begin();
7660     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7661     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7662     ++it;
7663     // Copy the operand to ensure it's not invalidated when Inst grows.
7664     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7665   }
7666 }
7667 
7668 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7669   OptionalImmIndexMap OptionalIdx;
7670   cvtVOP3(Inst, Operands, OptionalIdx);
7671 }
7672 
7673 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7674                                OptionalImmIndexMap &OptIdx) {
7675   const int Opc = Inst.getOpcode();
7676   const MCInstrDesc &Desc = MII.get(Opc);
7677 
7678   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7679 
7680   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7681     assert(!IsPacked);
7682     Inst.addOperand(Inst.getOperand(0));
7683   }
7684 
7685   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7686   // instruction, and then figure out where to actually put the modifiers
7687 
7688   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7689   if (OpSelIdx != -1) {
7690     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7691   }
7692 
7693   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7694   if (OpSelHiIdx != -1) {
7695     int DefaultVal = IsPacked ? -1 : 0;
7696     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7697                           DefaultVal);
7698   }
7699 
7700   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7701   if (NegLoIdx != -1) {
7702     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7703     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7704   }
7705 
7706   const int Ops[] = { AMDGPU::OpName::src0,
7707                       AMDGPU::OpName::src1,
7708                       AMDGPU::OpName::src2 };
7709   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7710                          AMDGPU::OpName::src1_modifiers,
7711                          AMDGPU::OpName::src2_modifiers };
7712 
7713   unsigned OpSel = 0;
7714   unsigned OpSelHi = 0;
7715   unsigned NegLo = 0;
7716   unsigned NegHi = 0;
7717 
7718   if (OpSelIdx != -1)
7719     OpSel = Inst.getOperand(OpSelIdx).getImm();
7720 
7721   if (OpSelHiIdx != -1)
7722     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7723 
7724   if (NegLoIdx != -1) {
7725     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7726     NegLo = Inst.getOperand(NegLoIdx).getImm();
7727     NegHi = Inst.getOperand(NegHiIdx).getImm();
7728   }
7729 
7730   for (int J = 0; J < 3; ++J) {
7731     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7732     if (OpIdx == -1)
7733       break;
7734 
7735     uint32_t ModVal = 0;
7736 
7737     if ((OpSel & (1 << J)) != 0)
7738       ModVal |= SISrcMods::OP_SEL_0;
7739 
7740     if ((OpSelHi & (1 << J)) != 0)
7741       ModVal |= SISrcMods::OP_SEL_1;
7742 
7743     if ((NegLo & (1 << J)) != 0)
7744       ModVal |= SISrcMods::NEG;
7745 
7746     if ((NegHi & (1 << J)) != 0)
7747       ModVal |= SISrcMods::NEG_HI;
7748 
7749     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7750 
7751     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7752   }
7753 }
7754 
7755 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7756   OptionalImmIndexMap OptIdx;
7757   cvtVOP3(Inst, Operands, OptIdx);
7758   cvtVOP3P(Inst, Operands, OptIdx);
7759 }
7760 
7761 //===----------------------------------------------------------------------===//
7762 // dpp
7763 //===----------------------------------------------------------------------===//
7764 
7765 bool AMDGPUOperand::isDPP8() const {
7766   return isImmTy(ImmTyDPP8);
7767 }
7768 
7769 bool AMDGPUOperand::isDPPCtrl() const {
7770   using namespace AMDGPU::DPP;
7771 
7772   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7773   if (result) {
7774     int64_t Imm = getImm();
7775     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7776            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7777            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7778            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7779            (Imm == DppCtrl::WAVE_SHL1) ||
7780            (Imm == DppCtrl::WAVE_ROL1) ||
7781            (Imm == DppCtrl::WAVE_SHR1) ||
7782            (Imm == DppCtrl::WAVE_ROR1) ||
7783            (Imm == DppCtrl::ROW_MIRROR) ||
7784            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7785            (Imm == DppCtrl::BCAST15) ||
7786            (Imm == DppCtrl::BCAST31) ||
7787            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7788            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7789   }
7790   return false;
7791 }
7792 
7793 //===----------------------------------------------------------------------===//
7794 // mAI
7795 //===----------------------------------------------------------------------===//
7796 
7797 bool AMDGPUOperand::isBLGP() const {
7798   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7799 }
7800 
7801 bool AMDGPUOperand::isCBSZ() const {
7802   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7803 }
7804 
7805 bool AMDGPUOperand::isABID() const {
7806   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7807 }
7808 
7809 bool AMDGPUOperand::isS16Imm() const {
7810   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7811 }
7812 
7813 bool AMDGPUOperand::isU16Imm() const {
7814   return isImm() && isUInt<16>(getImm());
7815 }
7816 
7817 //===----------------------------------------------------------------------===//
7818 // dim
7819 //===----------------------------------------------------------------------===//
7820 
7821 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7822   // We want to allow "dim:1D" etc.,
7823   // but the initial 1 is tokenized as an integer.
7824   std::string Token;
7825   if (isToken(AsmToken::Integer)) {
7826     SMLoc Loc = getToken().getEndLoc();
7827     Token = std::string(getTokenStr());
7828     lex();
7829     if (getLoc() != Loc)
7830       return false;
7831   }
7832 
7833   StringRef Suffix;
7834   if (!parseId(Suffix))
7835     return false;
7836   Token += Suffix;
7837 
7838   StringRef DimId = Token;
7839   if (DimId.startswith("SQ_RSRC_IMG_"))
7840     DimId = DimId.drop_front(12);
7841 
7842   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7843   if (!DimInfo)
7844     return false;
7845 
7846   Encoding = DimInfo->Encoding;
7847   return true;
7848 }
7849 
7850 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7851   if (!isGFX10Plus())
7852     return MatchOperand_NoMatch;
7853 
7854   SMLoc S = getLoc();
7855 
7856   if (!trySkipId("dim", AsmToken::Colon))
7857     return MatchOperand_NoMatch;
7858 
7859   unsigned Encoding;
7860   SMLoc Loc = getLoc();
7861   if (!parseDimId(Encoding)) {
7862     Error(Loc, "invalid dim value");
7863     return MatchOperand_ParseFail;
7864   }
7865 
7866   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7867                                               AMDGPUOperand::ImmTyDim));
7868   return MatchOperand_Success;
7869 }
7870 
7871 //===----------------------------------------------------------------------===//
7872 // dpp
7873 //===----------------------------------------------------------------------===//
7874 
7875 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7876   SMLoc S = getLoc();
7877 
7878   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7879     return MatchOperand_NoMatch;
7880 
7881   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7882 
7883   int64_t Sels[8];
7884 
7885   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7886     return MatchOperand_ParseFail;
7887 
7888   for (size_t i = 0; i < 8; ++i) {
7889     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7890       return MatchOperand_ParseFail;
7891 
7892     SMLoc Loc = getLoc();
7893     if (getParser().parseAbsoluteExpression(Sels[i]))
7894       return MatchOperand_ParseFail;
7895     if (0 > Sels[i] || 7 < Sels[i]) {
7896       Error(Loc, "expected a 3-bit value");
7897       return MatchOperand_ParseFail;
7898     }
7899   }
7900 
7901   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7902     return MatchOperand_ParseFail;
7903 
7904   unsigned DPP8 = 0;
7905   for (size_t i = 0; i < 8; ++i)
7906     DPP8 |= (Sels[i] << (i * 3));
7907 
7908   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7909   return MatchOperand_Success;
7910 }
7911 
7912 bool
7913 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7914                                     const OperandVector &Operands) {
7915   if (Ctrl == "row_newbcast")
7916     return isGFX90A();
7917 
7918   if (Ctrl == "row_share" ||
7919       Ctrl == "row_xmask")
7920     return isGFX10Plus();
7921 
7922   if (Ctrl == "wave_shl" ||
7923       Ctrl == "wave_shr" ||
7924       Ctrl == "wave_rol" ||
7925       Ctrl == "wave_ror" ||
7926       Ctrl == "row_bcast")
7927     return isVI() || isGFX9();
7928 
7929   return Ctrl == "row_mirror" ||
7930          Ctrl == "row_half_mirror" ||
7931          Ctrl == "quad_perm" ||
7932          Ctrl == "row_shl" ||
7933          Ctrl == "row_shr" ||
7934          Ctrl == "row_ror";
7935 }
7936 
7937 int64_t
7938 AMDGPUAsmParser::parseDPPCtrlPerm() {
7939   // quad_perm:[%d,%d,%d,%d]
7940 
7941   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7942     return -1;
7943 
7944   int64_t Val = 0;
7945   for (int i = 0; i < 4; ++i) {
7946     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7947       return -1;
7948 
7949     int64_t Temp;
7950     SMLoc Loc = getLoc();
7951     if (getParser().parseAbsoluteExpression(Temp))
7952       return -1;
7953     if (Temp < 0 || Temp > 3) {
7954       Error(Loc, "expected a 2-bit value");
7955       return -1;
7956     }
7957 
7958     Val += (Temp << i * 2);
7959   }
7960 
7961   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7962     return -1;
7963 
7964   return Val;
7965 }
7966 
7967 int64_t
7968 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7969   using namespace AMDGPU::DPP;
7970 
7971   // sel:%d
7972 
7973   int64_t Val;
7974   SMLoc Loc = getLoc();
7975 
7976   if (getParser().parseAbsoluteExpression(Val))
7977     return -1;
7978 
7979   struct DppCtrlCheck {
7980     int64_t Ctrl;
7981     int Lo;
7982     int Hi;
7983   };
7984 
7985   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7986     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7987     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7988     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7989     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7990     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7991     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7992     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7993     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7994     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7995     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7996     .Default({-1, 0, 0});
7997 
7998   bool Valid;
7999   if (Check.Ctrl == -1) {
8000     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8001     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8002   } else {
8003     Valid = Check.Lo <= Val && Val <= Check.Hi;
8004     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8005   }
8006 
8007   if (!Valid) {
8008     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8009     return -1;
8010   }
8011 
8012   return Val;
8013 }
8014 
8015 OperandMatchResultTy
8016 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8017   using namespace AMDGPU::DPP;
8018 
8019   if (!isToken(AsmToken::Identifier) ||
8020       !isSupportedDPPCtrl(getTokenStr(), Operands))
8021     return MatchOperand_NoMatch;
8022 
8023   SMLoc S = getLoc();
8024   int64_t Val = -1;
8025   StringRef Ctrl;
8026 
8027   parseId(Ctrl);
8028 
8029   if (Ctrl == "row_mirror") {
8030     Val = DppCtrl::ROW_MIRROR;
8031   } else if (Ctrl == "row_half_mirror") {
8032     Val = DppCtrl::ROW_HALF_MIRROR;
8033   } else {
8034     if (skipToken(AsmToken::Colon, "expected a colon")) {
8035       if (Ctrl == "quad_perm") {
8036         Val = parseDPPCtrlPerm();
8037       } else {
8038         Val = parseDPPCtrlSel(Ctrl);
8039       }
8040     }
8041   }
8042 
8043   if (Val == -1)
8044     return MatchOperand_ParseFail;
8045 
8046   Operands.push_back(
8047     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8048   return MatchOperand_Success;
8049 }
8050 
8051 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8052   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8053 }
8054 
8055 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8056   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8057 }
8058 
8059 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8060   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8061 }
8062 
8063 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8064   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8065 }
8066 
8067 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8068   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8069 }
8070 
8071 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8072   OptionalImmIndexMap OptionalIdx;
8073 
8074   unsigned Opc = Inst.getOpcode();
8075   bool HasModifiers =
8076       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8077   unsigned I = 1;
8078   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8079   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8080     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8081   }
8082 
8083   int Fi = 0;
8084   for (unsigned E = Operands.size(); I != E; ++I) {
8085     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8086                                             MCOI::TIED_TO);
8087     if (TiedTo != -1) {
8088       assert((unsigned)TiedTo < Inst.getNumOperands());
8089       // handle tied old or src2 for MAC instructions
8090       Inst.addOperand(Inst.getOperand(TiedTo));
8091     }
8092     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8093     // Add the register arguments
8094     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8095       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8096       // Skip it.
8097       continue;
8098     }
8099 
8100     if (IsDPP8) {
8101       if (Op.isDPP8()) {
8102         Op.addImmOperands(Inst, 1);
8103       } else if (HasModifiers &&
8104                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8105         Op.addRegWithFPInputModsOperands(Inst, 2);
8106       } else if (Op.isFI()) {
8107         Fi = Op.getImm();
8108       } else if (Op.isReg()) {
8109         Op.addRegOperands(Inst, 1);
8110       } else {
8111         llvm_unreachable("Invalid operand type");
8112       }
8113     } else {
8114       if (HasModifiers &&
8115           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8116         Op.addRegWithFPInputModsOperands(Inst, 2);
8117       } else if (Op.isReg()) {
8118         Op.addRegOperands(Inst, 1);
8119       } else if (Op.isDPPCtrl()) {
8120         Op.addImmOperands(Inst, 1);
8121       } else if (Op.isImm()) {
8122         // Handle optional arguments
8123         OptionalIdx[Op.getImmTy()] = I;
8124       } else {
8125         llvm_unreachable("Invalid operand type");
8126       }
8127     }
8128   }
8129 
8130   if (IsDPP8) {
8131     using namespace llvm::AMDGPU::DPP;
8132     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8133   } else {
8134     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8135     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8136     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8137     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8138       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8139     }
8140   }
8141 }
8142 
8143 //===----------------------------------------------------------------------===//
8144 // sdwa
8145 //===----------------------------------------------------------------------===//
8146 
8147 OperandMatchResultTy
8148 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8149                               AMDGPUOperand::ImmTy Type) {
8150   using namespace llvm::AMDGPU::SDWA;
8151 
8152   SMLoc S = getLoc();
8153   StringRef Value;
8154   OperandMatchResultTy res;
8155 
8156   SMLoc StringLoc;
8157   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8158   if (res != MatchOperand_Success) {
8159     return res;
8160   }
8161 
8162   int64_t Int;
8163   Int = StringSwitch<int64_t>(Value)
8164         .Case("BYTE_0", SdwaSel::BYTE_0)
8165         .Case("BYTE_1", SdwaSel::BYTE_1)
8166         .Case("BYTE_2", SdwaSel::BYTE_2)
8167         .Case("BYTE_3", SdwaSel::BYTE_3)
8168         .Case("WORD_0", SdwaSel::WORD_0)
8169         .Case("WORD_1", SdwaSel::WORD_1)
8170         .Case("DWORD", SdwaSel::DWORD)
8171         .Default(0xffffffff);
8172 
8173   if (Int == 0xffffffff) {
8174     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8175     return MatchOperand_ParseFail;
8176   }
8177 
8178   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8179   return MatchOperand_Success;
8180 }
8181 
8182 OperandMatchResultTy
8183 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8184   using namespace llvm::AMDGPU::SDWA;
8185 
8186   SMLoc S = getLoc();
8187   StringRef Value;
8188   OperandMatchResultTy res;
8189 
8190   SMLoc StringLoc;
8191   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8192   if (res != MatchOperand_Success) {
8193     return res;
8194   }
8195 
8196   int64_t Int;
8197   Int = StringSwitch<int64_t>(Value)
8198         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8199         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8200         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8201         .Default(0xffffffff);
8202 
8203   if (Int == 0xffffffff) {
8204     Error(StringLoc, "invalid dst_unused value");
8205     return MatchOperand_ParseFail;
8206   }
8207 
8208   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8209   return MatchOperand_Success;
8210 }
8211 
8212 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8213   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8214 }
8215 
8216 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8217   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8218 }
8219 
8220 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8221   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8222 }
8223 
8224 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8225   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8226 }
8227 
8228 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8229   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8230 }
8231 
8232 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8233                               uint64_t BasicInstType,
8234                               bool SkipDstVcc,
8235                               bool SkipSrcVcc) {
8236   using namespace llvm::AMDGPU::SDWA;
8237 
8238   OptionalImmIndexMap OptionalIdx;
8239   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8240   bool SkippedVcc = false;
8241 
8242   unsigned I = 1;
8243   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8244   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8245     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8246   }
8247 
8248   for (unsigned E = Operands.size(); I != E; ++I) {
8249     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8250     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8251         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8252       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8253       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8254       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8255       // Skip VCC only if we didn't skip it on previous iteration.
8256       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8257       if (BasicInstType == SIInstrFlags::VOP2 &&
8258           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8259            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8260         SkippedVcc = true;
8261         continue;
8262       } else if (BasicInstType == SIInstrFlags::VOPC &&
8263                  Inst.getNumOperands() == 0) {
8264         SkippedVcc = true;
8265         continue;
8266       }
8267     }
8268     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8269       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8270     } else if (Op.isImm()) {
8271       // Handle optional arguments
8272       OptionalIdx[Op.getImmTy()] = I;
8273     } else {
8274       llvm_unreachable("Invalid operand type");
8275     }
8276     SkippedVcc = false;
8277   }
8278 
8279   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8280       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8281       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8282     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8283     switch (BasicInstType) {
8284     case SIInstrFlags::VOP1:
8285       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8286       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8287         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8288       }
8289       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8290       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8291       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8292       break;
8293 
8294     case SIInstrFlags::VOP2:
8295       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8296       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8297         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8298       }
8299       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8300       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8301       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8302       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8303       break;
8304 
8305     case SIInstrFlags::VOPC:
8306       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8307         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8308       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8309       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8310       break;
8311 
8312     default:
8313       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8314     }
8315   }
8316 
8317   // special case v_mac_{f16, f32}:
8318   // it has src2 register operand that is tied to dst operand
8319   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8320       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8321     auto it = Inst.begin();
8322     std::advance(
8323       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8324     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8325   }
8326 }
8327 
8328 //===----------------------------------------------------------------------===//
8329 // mAI
8330 //===----------------------------------------------------------------------===//
8331 
8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8333   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8334 }
8335 
8336 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8337   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8338 }
8339 
8340 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8341   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8342 }
8343 
8344 /// Force static initialization.
8345 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8346   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8347   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8348 }
8349 
8350 #define GET_REGISTER_MATCHER
8351 #define GET_MATCHER_IMPLEMENTATION
8352 #define GET_MNEMONIC_SPELL_CHECKER
8353 #define GET_MNEMONIC_CHECKER
8354 #include "AMDGPUGenAsmMatcher.inc"
8355 
8356 // This fuction should be defined after auto-generated include so that we have
8357 // MatchClassKind enum defined
8358 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8359                                                      unsigned Kind) {
8360   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8361   // But MatchInstructionImpl() expects to meet token and fails to validate
8362   // operand. This method checks if we are given immediate operand but expect to
8363   // get corresponding token.
8364   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8365   switch (Kind) {
8366   case MCK_addr64:
8367     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8368   case MCK_gds:
8369     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8370   case MCK_lds:
8371     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8372   case MCK_idxen:
8373     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8374   case MCK_offen:
8375     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8376   case MCK_SSrcB32:
8377     // When operands have expression values, they will return true for isToken,
8378     // because it is not possible to distinguish between a token and an
8379     // expression at parse time. MatchInstructionImpl() will always try to
8380     // match an operand as a token, when isToken returns true, and when the
8381     // name of the expression is not a valid token, the match will fail,
8382     // so we need to handle it here.
8383     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8384   case MCK_SSrcF32:
8385     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8386   case MCK_SoppBrTarget:
8387     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8388   case MCK_VReg32OrOff:
8389     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8390   case MCK_InterpSlot:
8391     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8392   case MCK_Attr:
8393     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8394   case MCK_AttrChan:
8395     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8396   case MCK_ImmSMEMOffset:
8397     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8398   case MCK_SReg_64:
8399   case MCK_SReg_64_XEXEC:
8400     // Null is defined as a 32-bit register but
8401     // it should also be enabled with 64-bit operands.
8402     // The following code enables it for SReg_64 operands
8403     // used as source and destination. Remaining source
8404     // operands are handled in isInlinableImm.
8405     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8406   default:
8407     return Match_InvalidOperand;
8408   }
8409 }
8410 
8411 //===----------------------------------------------------------------------===//
8412 // endpgm
8413 //===----------------------------------------------------------------------===//
8414 
8415 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8416   SMLoc S = getLoc();
8417   int64_t Imm = 0;
8418 
8419   if (!parseExpr(Imm)) {
8420     // The operand is optional, if not present default to 0
8421     Imm = 0;
8422   }
8423 
8424   if (!isUInt<16>(Imm)) {
8425     Error(S, "expected a 16-bit value");
8426     return MatchOperand_ParseFail;
8427   }
8428 
8429   Operands.push_back(
8430       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8431   return MatchOperand_Success;
8432 }
8433 
8434 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8435