1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyA16,
167     ImmTyLWE,
168     ImmTyExpTgt,
169     ImmTyExpCompr,
170     ImmTyExpVM,
171     ImmTyFORMAT,
172     ImmTyHwreg,
173     ImmTyOff,
174     ImmTySendMsg,
175     ImmTyInterpSlot,
176     ImmTyInterpAttr,
177     ImmTyAttrChan,
178     ImmTyOpSel,
179     ImmTyOpSelHi,
180     ImmTyNegLo,
181     ImmTyNegHi,
182     ImmTySwizzle,
183     ImmTyGprIdxMode,
184     ImmTyHigh,
185     ImmTyBLGP,
186     ImmTyCBSZ,
187     ImmTyABID,
188     ImmTyEndpgm,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     Modifiers Mods;
202   };
203 
204   struct RegOp {
205     unsigned RegNo;
206     Modifiers Mods;
207   };
208 
209   union {
210     TokOp Tok;
211     ImmOp Imm;
212     RegOp Reg;
213     const MCExpr *Expr;
214   };
215 
216 public:
217   bool isToken() const override {
218     if (Kind == Token)
219       return true;
220 
221     // When parsing operands, we can't always tell if something was meant to be
222     // a token, like 'gds', or an expression that references a global variable.
223     // In this case, we assume the string is an expression, and if we need to
224     // interpret is a token, then we treat the symbol name as the token.
225     return isSymbolRefExpr();
226   }
227 
228   bool isSymbolRefExpr() const {
229     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230   }
231 
232   bool isImm() const override {
233     return Kind == Immediate;
234   }
235 
236   bool isInlinableImm(MVT type) const;
237   bool isLiteralImm(MVT type) const;
238 
239   bool isRegKind() const {
240     return Kind == Register;
241   }
242 
243   bool isReg() const override {
244     return isRegKind() && !hasModifiers();
245   }
246 
247   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
248     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249   }
250 
251   bool isRegOrImmWithInt16InputMods() const {
252     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253   }
254 
255   bool isRegOrImmWithInt32InputMods() const {
256     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257   }
258 
259   bool isRegOrImmWithInt64InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261   }
262 
263   bool isRegOrImmWithFP16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265   }
266 
267   bool isRegOrImmWithFP32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269   }
270 
271   bool isRegOrImmWithFP64InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273   }
274 
275   bool isVReg() const {
276     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
277            isRegClass(AMDGPU::VReg_64RegClassID) ||
278            isRegClass(AMDGPU::VReg_96RegClassID) ||
279            isRegClass(AMDGPU::VReg_128RegClassID) ||
280            isRegClass(AMDGPU::VReg_160RegClassID) ||
281            isRegClass(AMDGPU::VReg_256RegClassID) ||
282            isRegClass(AMDGPU::VReg_512RegClassID) ||
283            isRegClass(AMDGPU::VReg_1024RegClassID);
284   }
285 
286   bool isVReg32() const {
287     return isRegClass(AMDGPU::VGPR_32RegClassID);
288   }
289 
290   bool isVReg32OrOff() const {
291     return isOff() || isVReg32();
292   }
293 
294   bool isNull() const {
295     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
296   }
297 
298   bool isSDWAOperand(MVT type) const;
299   bool isSDWAFP16Operand() const;
300   bool isSDWAFP32Operand() const;
301   bool isSDWAInt16Operand() const;
302   bool isSDWAInt32Operand() const;
303 
304   bool isImmTy(ImmTy ImmT) const {
305     return isImm() && Imm.Type == ImmT;
306   }
307 
308   bool isImmModifier() const {
309     return isImm() && Imm.Type != ImmTyNone;
310   }
311 
312   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
313   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
314   bool isDMask() const { return isImmTy(ImmTyDMask); }
315   bool isDim() const { return isImmTy(ImmTyDim); }
316   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
317   bool isDA() const { return isImmTy(ImmTyDA); }
318   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
319   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
320   bool isLWE() const { return isImmTy(ImmTyLWE); }
321   bool isOff() const { return isImmTy(ImmTyOff); }
322   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
323   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
324   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
325   bool isOffen() const { return isImmTy(ImmTyOffen); }
326   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
327   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
328   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
329   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
330   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
331 
332   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
333   bool isGDS() const { return isImmTy(ImmTyGDS); }
334   bool isLDS() const { return isImmTy(ImmTyLDS); }
335   bool isDLC() const { return isImmTy(ImmTyDLC); }
336   bool isGLC() const { return isImmTy(ImmTyGLC); }
337   bool isSLC() const { return isImmTy(ImmTySLC); }
338   bool isSWZ() const { return isImmTy(ImmTySWZ); }
339   bool isTFE() const { return isImmTy(ImmTyTFE); }
340   bool isD16() const { return isImmTy(ImmTyD16); }
341   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
342   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
343   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
344   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
345   bool isFI() const { return isImmTy(ImmTyDppFi); }
346   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
347   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
348   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
349   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
350   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
351   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
352   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
353   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
354   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
355   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
356   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
357   bool isHigh() const { return isImmTy(ImmTyHigh); }
358 
359   bool isMod() const {
360     return isClampSI() || isOModSI();
361   }
362 
363   bool isRegOrImm() const {
364     return isReg() || isImm();
365   }
366 
367   bool isRegClass(unsigned RCID) const;
368 
369   bool isInlineValue() const;
370 
371   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
372     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
373   }
374 
375   bool isSCSrcB16() const {
376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
377   }
378 
379   bool isSCSrcV2B16() const {
380     return isSCSrcB16();
381   }
382 
383   bool isSCSrcB32() const {
384     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
385   }
386 
387   bool isSCSrcB64() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
389   }
390 
391   bool isBoolReg() const;
392 
393   bool isSCSrcF16() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
395   }
396 
397   bool isSCSrcV2F16() const {
398     return isSCSrcF16();
399   }
400 
401   bool isSCSrcF32() const {
402     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
403   }
404 
405   bool isSCSrcF64() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
407   }
408 
409   bool isSSrcB32() const {
410     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
411   }
412 
413   bool isSSrcB16() const {
414     return isSCSrcB16() || isLiteralImm(MVT::i16);
415   }
416 
417   bool isSSrcV2B16() const {
418     llvm_unreachable("cannot happen");
419     return isSSrcB16();
420   }
421 
422   bool isSSrcB64() const {
423     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
424     // See isVSrc64().
425     return isSCSrcB64() || isLiteralImm(MVT::i64);
426   }
427 
428   bool isSSrcF32() const {
429     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
430   }
431 
432   bool isSSrcF64() const {
433     return isSCSrcB64() || isLiteralImm(MVT::f64);
434   }
435 
436   bool isSSrcF16() const {
437     return isSCSrcB16() || isLiteralImm(MVT::f16);
438   }
439 
440   bool isSSrcV2F16() const {
441     llvm_unreachable("cannot happen");
442     return isSSrcF16();
443   }
444 
445   bool isSSrcOrLdsB32() const {
446     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
447            isLiteralImm(MVT::i32) || isExpr();
448   }
449 
450   bool isVCSrcB32() const {
451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
452   }
453 
454   bool isVCSrcB64() const {
455     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
456   }
457 
458   bool isVCSrcB16() const {
459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
460   }
461 
462   bool isVCSrcV2B16() const {
463     return isVCSrcB16();
464   }
465 
466   bool isVCSrcF32() const {
467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
468   }
469 
470   bool isVCSrcF64() const {
471     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
472   }
473 
474   bool isVCSrcF16() const {
475     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
476   }
477 
478   bool isVCSrcV2F16() const {
479     return isVCSrcF16();
480   }
481 
482   bool isVSrcB32() const {
483     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
484   }
485 
486   bool isVSrcB64() const {
487     return isVCSrcF64() || isLiteralImm(MVT::i64);
488   }
489 
490   bool isVSrcB16() const {
491     return isVCSrcF16() || isLiteralImm(MVT::i16);
492   }
493 
494   bool isVSrcV2B16() const {
495     return isVSrcB16() || isLiteralImm(MVT::v2i16);
496   }
497 
498   bool isVSrcF32() const {
499     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
500   }
501 
502   bool isVSrcF64() const {
503     return isVCSrcF64() || isLiteralImm(MVT::f64);
504   }
505 
506   bool isVSrcF16() const {
507     return isVCSrcF16() || isLiteralImm(MVT::f16);
508   }
509 
510   bool isVSrcV2F16() const {
511     return isVSrcF16() || isLiteralImm(MVT::v2f16);
512   }
513 
514   bool isVISrcB32() const {
515     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
516   }
517 
518   bool isVISrcB16() const {
519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
520   }
521 
522   bool isVISrcV2B16() const {
523     return isVISrcB16();
524   }
525 
526   bool isVISrcF32() const {
527     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
528   }
529 
530   bool isVISrcF16() const {
531     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
532   }
533 
534   bool isVISrcV2F16() const {
535     return isVISrcF16() || isVISrcB32();
536   }
537 
538   bool isAISrcB32() const {
539     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
540   }
541 
542   bool isAISrcB16() const {
543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
544   }
545 
546   bool isAISrcV2B16() const {
547     return isAISrcB16();
548   }
549 
550   bool isAISrcF32() const {
551     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
552   }
553 
554   bool isAISrcF16() const {
555     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
556   }
557 
558   bool isAISrcV2F16() const {
559     return isAISrcF16() || isAISrcB32();
560   }
561 
562   bool isAISrc_128B32() const {
563     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
564   }
565 
566   bool isAISrc_128B16() const {
567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
568   }
569 
570   bool isAISrc_128V2B16() const {
571     return isAISrc_128B16();
572   }
573 
574   bool isAISrc_128F32() const {
575     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
576   }
577 
578   bool isAISrc_128F16() const {
579     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
580   }
581 
582   bool isAISrc_128V2F16() const {
583     return isAISrc_128F16() || isAISrc_128B32();
584   }
585 
586   bool isAISrc_512B32() const {
587     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
588   }
589 
590   bool isAISrc_512B16() const {
591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
592   }
593 
594   bool isAISrc_512V2B16() const {
595     return isAISrc_512B16();
596   }
597 
598   bool isAISrc_512F32() const {
599     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
600   }
601 
602   bool isAISrc_512F16() const {
603     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
604   }
605 
606   bool isAISrc_512V2F16() const {
607     return isAISrc_512F16() || isAISrc_512B32();
608   }
609 
610   bool isAISrc_1024B32() const {
611     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
612   }
613 
614   bool isAISrc_1024B16() const {
615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
616   }
617 
618   bool isAISrc_1024V2B16() const {
619     return isAISrc_1024B16();
620   }
621 
622   bool isAISrc_1024F32() const {
623     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
624   }
625 
626   bool isAISrc_1024F16() const {
627     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
628   }
629 
630   bool isAISrc_1024V2F16() const {
631     return isAISrc_1024F16() || isAISrc_1024B32();
632   }
633 
634   bool isKImmFP32() const {
635     return isLiteralImm(MVT::f32);
636   }
637 
638   bool isKImmFP16() const {
639     return isLiteralImm(MVT::f16);
640   }
641 
642   bool isMem() const override {
643     return false;
644   }
645 
646   bool isExpr() const {
647     return Kind == Expression;
648   }
649 
650   bool isSoppBrTarget() const {
651     return isExpr() || isImm();
652   }
653 
654   bool isSWaitCnt() const;
655   bool isHwreg() const;
656   bool isSendMsg() const;
657   bool isSwizzle() const;
658   bool isSMRDOffset8() const;
659   bool isSMRDOffset20() const;
660   bool isSMRDLiteralOffset() const;
661   bool isDPP8() const;
662   bool isDPPCtrl() const;
663   bool isBLGP() const;
664   bool isCBSZ() const;
665   bool isABID() const;
666   bool isGPRIdxMode() const;
667   bool isS16Imm() const;
668   bool isU16Imm() const;
669   bool isEndpgm() const;
670 
671   StringRef getExpressionAsToken() const {
672     assert(isExpr());
673     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
674     return S->getSymbol().getName();
675   }
676 
677   StringRef getToken() const {
678     assert(isToken());
679 
680     if (Kind == Expression)
681       return getExpressionAsToken();
682 
683     return StringRef(Tok.Data, Tok.Length);
684   }
685 
686   int64_t getImm() const {
687     assert(isImm());
688     return Imm.Val;
689   }
690 
691   ImmTy getImmTy() const {
692     assert(isImm());
693     return Imm.Type;
694   }
695 
696   unsigned getReg() const override {
697     assert(isRegKind());
698     return Reg.RegNo;
699   }
700 
701   SMLoc getStartLoc() const override {
702     return StartLoc;
703   }
704 
705   SMLoc getEndLoc() const override {
706     return EndLoc;
707   }
708 
709   SMRange getLocRange() const {
710     return SMRange(StartLoc, EndLoc);
711   }
712 
713   Modifiers getModifiers() const {
714     assert(isRegKind() || isImmTy(ImmTyNone));
715     return isRegKind() ? Reg.Mods : Imm.Mods;
716   }
717 
718   void setModifiers(Modifiers Mods) {
719     assert(isRegKind() || isImmTy(ImmTyNone));
720     if (isRegKind())
721       Reg.Mods = Mods;
722     else
723       Imm.Mods = Mods;
724   }
725 
726   bool hasModifiers() const {
727     return getModifiers().hasModifiers();
728   }
729 
730   bool hasFPModifiers() const {
731     return getModifiers().hasFPModifiers();
732   }
733 
734   bool hasIntModifiers() const {
735     return getModifiers().hasIntModifiers();
736   }
737 
738   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
739 
740   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
741 
742   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
743 
744   template <unsigned Bitwidth>
745   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
746 
747   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
748     addKImmFPOperands<16>(Inst, N);
749   }
750 
751   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
752     addKImmFPOperands<32>(Inst, N);
753   }
754 
755   void addRegOperands(MCInst &Inst, unsigned N) const;
756 
757   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
758     addRegOperands(Inst, N);
759   }
760 
761   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
762     if (isRegKind())
763       addRegOperands(Inst, N);
764     else if (isExpr())
765       Inst.addOperand(MCOperand::createExpr(Expr));
766     else
767       addImmOperands(Inst, N);
768   }
769 
770   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
771     Modifiers Mods = getModifiers();
772     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
773     if (isRegKind()) {
774       addRegOperands(Inst, N);
775     } else {
776       addImmOperands(Inst, N, false);
777     }
778   }
779 
780   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
781     assert(!hasIntModifiers());
782     addRegOrImmWithInputModsOperands(Inst, N);
783   }
784 
785   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
786     assert(!hasFPModifiers());
787     addRegOrImmWithInputModsOperands(Inst, N);
788   }
789 
790   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
791     Modifiers Mods = getModifiers();
792     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
793     assert(isRegKind());
794     addRegOperands(Inst, N);
795   }
796 
797   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
798     assert(!hasIntModifiers());
799     addRegWithInputModsOperands(Inst, N);
800   }
801 
802   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
803     assert(!hasFPModifiers());
804     addRegWithInputModsOperands(Inst, N);
805   }
806 
807   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
808     if (isImm())
809       addImmOperands(Inst, N);
810     else {
811       assert(isExpr());
812       Inst.addOperand(MCOperand::createExpr(Expr));
813     }
814   }
815 
816   static void printImmTy(raw_ostream& OS, ImmTy Type) {
817     switch (Type) {
818     case ImmTyNone: OS << "None"; break;
819     case ImmTyGDS: OS << "GDS"; break;
820     case ImmTyLDS: OS << "LDS"; break;
821     case ImmTyOffen: OS << "Offen"; break;
822     case ImmTyIdxen: OS << "Idxen"; break;
823     case ImmTyAddr64: OS << "Addr64"; break;
824     case ImmTyOffset: OS << "Offset"; break;
825     case ImmTyInstOffset: OS << "InstOffset"; break;
826     case ImmTyOffset0: OS << "Offset0"; break;
827     case ImmTyOffset1: OS << "Offset1"; break;
828     case ImmTyDLC: OS << "DLC"; break;
829     case ImmTyGLC: OS << "GLC"; break;
830     case ImmTySLC: OS << "SLC"; break;
831     case ImmTySWZ: OS << "SWZ"; break;
832     case ImmTyTFE: OS << "TFE"; break;
833     case ImmTyD16: OS << "D16"; break;
834     case ImmTyFORMAT: OS << "FORMAT"; break;
835     case ImmTyClampSI: OS << "ClampSI"; break;
836     case ImmTyOModSI: OS << "OModSI"; break;
837     case ImmTyDPP8: OS << "DPP8"; break;
838     case ImmTyDppCtrl: OS << "DppCtrl"; break;
839     case ImmTyDppRowMask: OS << "DppRowMask"; break;
840     case ImmTyDppBankMask: OS << "DppBankMask"; break;
841     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
842     case ImmTyDppFi: OS << "FI"; break;
843     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
844     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
845     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
846     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
847     case ImmTyDMask: OS << "DMask"; break;
848     case ImmTyDim: OS << "Dim"; break;
849     case ImmTyUNorm: OS << "UNorm"; break;
850     case ImmTyDA: OS << "DA"; break;
851     case ImmTyR128A16: OS << "R128A16"; break;
852     case ImmTyA16: OS << "A16"; break;
853     case ImmTyLWE: OS << "LWE"; break;
854     case ImmTyOff: OS << "Off"; break;
855     case ImmTyExpTgt: OS << "ExpTgt"; break;
856     case ImmTyExpCompr: OS << "ExpCompr"; break;
857     case ImmTyExpVM: OS << "ExpVM"; break;
858     case ImmTyHwreg: OS << "Hwreg"; break;
859     case ImmTySendMsg: OS << "SendMsg"; break;
860     case ImmTyInterpSlot: OS << "InterpSlot"; break;
861     case ImmTyInterpAttr: OS << "InterpAttr"; break;
862     case ImmTyAttrChan: OS << "AttrChan"; break;
863     case ImmTyOpSel: OS << "OpSel"; break;
864     case ImmTyOpSelHi: OS << "OpSelHi"; break;
865     case ImmTyNegLo: OS << "NegLo"; break;
866     case ImmTyNegHi: OS << "NegHi"; break;
867     case ImmTySwizzle: OS << "Swizzle"; break;
868     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
869     case ImmTyHigh: OS << "High"; break;
870     case ImmTyBLGP: OS << "BLGP"; break;
871     case ImmTyCBSZ: OS << "CBSZ"; break;
872     case ImmTyABID: OS << "ABID"; break;
873     case ImmTyEndpgm: OS << "Endpgm"; break;
874     }
875   }
876 
877   void print(raw_ostream &OS) const override {
878     switch (Kind) {
879     case Register:
880       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
881       break;
882     case Immediate:
883       OS << '<' << getImm();
884       if (getImmTy() != ImmTyNone) {
885         OS << " type: "; printImmTy(OS, getImmTy());
886       }
887       OS << " mods: " << Imm.Mods << '>';
888       break;
889     case Token:
890       OS << '\'' << getToken() << '\'';
891       break;
892     case Expression:
893       OS << "<expr " << *Expr << '>';
894       break;
895     }
896   }
897 
898   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
899                                       int64_t Val, SMLoc Loc,
900                                       ImmTy Type = ImmTyNone,
901                                       bool IsFPImm = false) {
902     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
903     Op->Imm.Val = Val;
904     Op->Imm.IsFPImm = IsFPImm;
905     Op->Imm.Type = Type;
906     Op->Imm.Mods = Modifiers();
907     Op->StartLoc = Loc;
908     Op->EndLoc = Loc;
909     return Op;
910   }
911 
912   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
913                                         StringRef Str, SMLoc Loc,
914                                         bool HasExplicitEncodingSize = true) {
915     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
916     Res->Tok.Data = Str.data();
917     Res->Tok.Length = Str.size();
918     Res->StartLoc = Loc;
919     Res->EndLoc = Loc;
920     return Res;
921   }
922 
923   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
924                                       unsigned RegNo, SMLoc S,
925                                       SMLoc E) {
926     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
927     Op->Reg.RegNo = RegNo;
928     Op->Reg.Mods = Modifiers();
929     Op->StartLoc = S;
930     Op->EndLoc = E;
931     return Op;
932   }
933 
934   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
935                                        const class MCExpr *Expr, SMLoc S) {
936     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
937     Op->Expr = Expr;
938     Op->StartLoc = S;
939     Op->EndLoc = S;
940     return Op;
941   }
942 };
943 
944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
945   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
946   return OS;
947 }
948 
949 //===----------------------------------------------------------------------===//
950 // AsmParser
951 //===----------------------------------------------------------------------===//
952 
953 // Holds info related to the current kernel, e.g. count of SGPRs used.
954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
955 // .amdgpu_hsa_kernel or at EOF.
956 class KernelScopeInfo {
957   int SgprIndexUnusedMin = -1;
958   int VgprIndexUnusedMin = -1;
959   MCContext *Ctx = nullptr;
960 
961   void usesSgprAt(int i) {
962     if (i >= SgprIndexUnusedMin) {
963       SgprIndexUnusedMin = ++i;
964       if (Ctx) {
965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
966         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
967       }
968     }
969   }
970 
971   void usesVgprAt(int i) {
972     if (i >= VgprIndexUnusedMin) {
973       VgprIndexUnusedMin = ++i;
974       if (Ctx) {
975         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
976         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
977       }
978     }
979   }
980 
981 public:
982   KernelScopeInfo() = default;
983 
984   void initialize(MCContext &Context) {
985     Ctx = &Context;
986     usesSgprAt(SgprIndexUnusedMin = -1);
987     usesVgprAt(VgprIndexUnusedMin = -1);
988   }
989 
990   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
991     switch (RegKind) {
992       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
993       case IS_AGPR: // fall through
994       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
995       default: break;
996     }
997   }
998 };
999 
1000 class AMDGPUAsmParser : public MCTargetAsmParser {
1001   MCAsmParser &Parser;
1002 
1003   // Number of extra operands parsed after the first optional operand.
1004   // This may be necessary to skip hardcoded mandatory operands.
1005   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1006 
1007   unsigned ForcedEncodingSize = 0;
1008   bool ForcedDPP = false;
1009   bool ForcedSDWA = false;
1010   KernelScopeInfo KernelScope;
1011 
1012   /// @name Auto-generated Match Functions
1013   /// {
1014 
1015 #define GET_ASSEMBLER_HEADER
1016 #include "AMDGPUGenAsmMatcher.inc"
1017 
1018   /// }
1019 
1020 private:
1021   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1022   bool OutOfRangeError(SMRange Range);
1023   /// Calculate VGPR/SGPR blocks required for given target, reserved
1024   /// registers, and user-specified NextFreeXGPR values.
1025   ///
1026   /// \param Features [in] Target features, used for bug corrections.
1027   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1028   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1029   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1030   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1031   /// descriptor field, if valid.
1032   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1033   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1034   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1035   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1036   /// \param VGPRBlocks [out] Result VGPR block count.
1037   /// \param SGPRBlocks [out] Result SGPR block count.
1038   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1039                           bool FlatScrUsed, bool XNACKUsed,
1040                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1041                           SMRange VGPRRange, unsigned NextFreeSGPR,
1042                           SMRange SGPRRange, unsigned &VGPRBlocks,
1043                           unsigned &SGPRBlocks);
1044   bool ParseDirectiveAMDGCNTarget();
1045   bool ParseDirectiveAMDHSAKernel();
1046   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1047   bool ParseDirectiveHSACodeObjectVersion();
1048   bool ParseDirectiveHSACodeObjectISA();
1049   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1050   bool ParseDirectiveAMDKernelCodeT();
1051   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1052   bool ParseDirectiveAMDGPUHsaKernel();
1053 
1054   bool ParseDirectiveISAVersion();
1055   bool ParseDirectiveHSAMetadata();
1056   bool ParseDirectivePALMetadataBegin();
1057   bool ParseDirectivePALMetadata();
1058   bool ParseDirectiveAMDGPULDS();
1059 
1060   /// Common code to parse out a block of text (typically YAML) between start and
1061   /// end directives.
1062   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1063                            const char *AssemblerDirectiveEnd,
1064                            std::string &CollectString);
1065 
1066   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1067                              RegisterKind RegKind, unsigned Reg1);
1068   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1069                            unsigned &RegNum, unsigned &RegWidth,
1070                            bool RestoreOnFailure = false);
1071   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1072                            unsigned &RegNum, unsigned &RegWidth,
1073                            SmallVectorImpl<AsmToken> &Tokens);
1074   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1075                            unsigned &RegWidth,
1076                            SmallVectorImpl<AsmToken> &Tokens);
1077   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1078                            unsigned &RegWidth,
1079                            SmallVectorImpl<AsmToken> &Tokens);
1080   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1081                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1082   bool ParseRegRange(unsigned& Num, unsigned& Width);
1083   unsigned getRegularReg(RegisterKind RegKind,
1084                          unsigned RegNum,
1085                          unsigned RegWidth);
1086 
1087   bool isRegister();
1088   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1089   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1090   void initializeGprCountSymbol(RegisterKind RegKind);
1091   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1092                              unsigned RegWidth);
1093   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1094                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1095   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1096                  bool IsGdsHardcoded);
1097 
1098 public:
1099   enum AMDGPUMatchResultTy {
1100     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1101   };
1102   enum OperandMode {
1103     OperandMode_Default,
1104     OperandMode_NSA,
1105   };
1106 
1107   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1108 
1109   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1110                const MCInstrInfo &MII,
1111                const MCTargetOptions &Options)
1112       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1113     MCAsmParserExtension::Initialize(Parser);
1114 
1115     if (getFeatureBits().none()) {
1116       // Set default features.
1117       copySTI().ToggleFeature("southern-islands");
1118     }
1119 
1120     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1121 
1122     {
1123       // TODO: make those pre-defined variables read-only.
1124       // Currently there is none suitable machinery in the core llvm-mc for this.
1125       // MCSymbol::isRedefinable is intended for another purpose, and
1126       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1127       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1128       MCContext &Ctx = getContext();
1129       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1130         MCSymbol *Sym =
1131             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1132         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1133         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1135         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1136         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1137       } else {
1138         MCSymbol *Sym =
1139             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1140         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1141         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1142         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1143         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1144         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1145       }
1146       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1147         initializeGprCountSymbol(IS_VGPR);
1148         initializeGprCountSymbol(IS_SGPR);
1149       } else
1150         KernelScope.initialize(getContext());
1151     }
1152   }
1153 
1154   bool hasXNACK() const {
1155     return AMDGPU::hasXNACK(getSTI());
1156   }
1157 
1158   bool hasMIMG_R128() const {
1159     return AMDGPU::hasMIMG_R128(getSTI());
1160   }
1161 
1162   bool hasPackedD16() const {
1163     return AMDGPU::hasPackedD16(getSTI());
1164   }
1165 
1166   bool hasGFX10A16() const {
1167     return AMDGPU::hasGFX10A16(getSTI());
1168   }
1169 
1170   bool isSI() const {
1171     return AMDGPU::isSI(getSTI());
1172   }
1173 
1174   bool isCI() const {
1175     return AMDGPU::isCI(getSTI());
1176   }
1177 
1178   bool isVI() const {
1179     return AMDGPU::isVI(getSTI());
1180   }
1181 
1182   bool isGFX9() const {
1183     return AMDGPU::isGFX9(getSTI());
1184   }
1185 
1186   bool isGFX10() const {
1187     return AMDGPU::isGFX10(getSTI());
1188   }
1189 
1190   bool hasInv2PiInlineImm() const {
1191     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1192   }
1193 
1194   bool hasFlatOffsets() const {
1195     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1196   }
1197 
1198   bool hasSGPR102_SGPR103() const {
1199     return !isVI() && !isGFX9();
1200   }
1201 
1202   bool hasSGPR104_SGPR105() const {
1203     return isGFX10();
1204   }
1205 
1206   bool hasIntClamp() const {
1207     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1208   }
1209 
1210   AMDGPUTargetStreamer &getTargetStreamer() {
1211     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1212     return static_cast<AMDGPUTargetStreamer &>(TS);
1213   }
1214 
1215   const MCRegisterInfo *getMRI() const {
1216     // We need this const_cast because for some reason getContext() is not const
1217     // in MCAsmParser.
1218     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1219   }
1220 
1221   const MCInstrInfo *getMII() const {
1222     return &MII;
1223   }
1224 
1225   const FeatureBitset &getFeatureBits() const {
1226     return getSTI().getFeatureBits();
1227   }
1228 
1229   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1230   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1231   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1232 
1233   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1234   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1235   bool isForcedDPP() const { return ForcedDPP; }
1236   bool isForcedSDWA() const { return ForcedSDWA; }
1237   ArrayRef<unsigned> getMatchedVariants() const;
1238 
1239   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1240   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1241                      bool RestoreOnFailure);
1242   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1243   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1244                                         SMLoc &EndLoc) override;
1245   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1246   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1247                                       unsigned Kind) override;
1248   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1249                                OperandVector &Operands, MCStreamer &Out,
1250                                uint64_t &ErrorInfo,
1251                                bool MatchingInlineAsm) override;
1252   bool ParseDirective(AsmToken DirectiveID) override;
1253   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1254                                     OperandMode Mode = OperandMode_Default);
1255   StringRef parseMnemonicSuffix(StringRef Name);
1256   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1257                         SMLoc NameLoc, OperandVector &Operands) override;
1258   //bool ProcessInstruction(MCInst &Inst);
1259 
1260   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1261 
1262   OperandMatchResultTy
1263   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1264                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1265                      bool (*ConvertResult)(int64_t &) = nullptr);
1266 
1267   OperandMatchResultTy
1268   parseOperandArrayWithPrefix(const char *Prefix,
1269                               OperandVector &Operands,
1270                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1271                               bool (*ConvertResult)(int64_t&) = nullptr);
1272 
1273   OperandMatchResultTy
1274   parseNamedBit(const char *Name, OperandVector &Operands,
1275                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1276   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1277                                              StringRef &Value);
1278 
1279   bool isModifier();
1280   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1281   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1282   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1283   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1284   bool parseSP3NegModifier();
1285   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1286   OperandMatchResultTy parseReg(OperandVector &Operands);
1287   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1288   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1289   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1290   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1291   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1292   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1293   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1294 
1295   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1296   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1297   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1298   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1299 
1300   bool parseCnt(int64_t &IntVal);
1301   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1302   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1303 
1304 private:
1305   struct OperandInfoTy {
1306     int64_t Id;
1307     bool IsSymbolic = false;
1308     bool IsDefined = false;
1309 
1310     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1311   };
1312 
1313   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1314   bool validateSendMsg(const OperandInfoTy &Msg,
1315                        const OperandInfoTy &Op,
1316                        const OperandInfoTy &Stream,
1317                        const SMLoc Loc);
1318 
1319   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1320   bool validateHwreg(const OperandInfoTy &HwReg,
1321                      const int64_t Offset,
1322                      const int64_t Width,
1323                      const SMLoc Loc);
1324 
1325   void errorExpTgt();
1326   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1327   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1328 
1329   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1330   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1331   bool validateSOPLiteral(const MCInst &Inst) const;
1332   bool validateConstantBusLimitations(const MCInst &Inst);
1333   bool validateEarlyClobberLimitations(const MCInst &Inst);
1334   bool validateIntClampSupported(const MCInst &Inst);
1335   bool validateMIMGAtomicDMask(const MCInst &Inst);
1336   bool validateMIMGGatherDMask(const MCInst &Inst);
1337   bool validateMovrels(const MCInst &Inst);
1338   bool validateMIMGDataSize(const MCInst &Inst);
1339   bool validateMIMGAddrSize(const MCInst &Inst);
1340   bool validateMIMGD16(const MCInst &Inst);
1341   bool validateMIMGDim(const MCInst &Inst);
1342   bool validateLdsDirect(const MCInst &Inst);
1343   bool validateOpSel(const MCInst &Inst);
1344   bool validateVccOperand(unsigned Reg) const;
1345   bool validateVOP3Literal(const MCInst &Inst) const;
1346   unsigned getConstantBusLimit(unsigned Opcode) const;
1347   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1348   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1349   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1350 
1351   bool isId(const StringRef Id) const;
1352   bool isId(const AsmToken &Token, const StringRef Id) const;
1353   bool isToken(const AsmToken::TokenKind Kind) const;
1354   bool trySkipId(const StringRef Id);
1355   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1356   bool trySkipToken(const AsmToken::TokenKind Kind);
1357   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1358   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1359   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1360   AsmToken::TokenKind getTokenKind() const;
1361   bool parseExpr(int64_t &Imm);
1362   bool parseExpr(OperandVector &Operands);
1363   StringRef getTokenStr() const;
1364   AsmToken peekToken();
1365   AsmToken getToken() const;
1366   SMLoc getLoc() const;
1367   void lex();
1368 
1369 public:
1370   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1371   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1372 
1373   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1374   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1375   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1376   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1377   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1378   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1379 
1380   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1381                             const unsigned MinVal,
1382                             const unsigned MaxVal,
1383                             const StringRef ErrMsg);
1384   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1385   bool parseSwizzleOffset(int64_t &Imm);
1386   bool parseSwizzleMacro(int64_t &Imm);
1387   bool parseSwizzleQuadPerm(int64_t &Imm);
1388   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1389   bool parseSwizzleBroadcast(int64_t &Imm);
1390   bool parseSwizzleSwap(int64_t &Imm);
1391   bool parseSwizzleReverse(int64_t &Imm);
1392 
1393   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1394   int64_t parseGPRIdxMacro();
1395 
1396   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1397   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1398   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1399   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1400   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1401 
1402   AMDGPUOperand::Ptr defaultDLC() const;
1403   AMDGPUOperand::Ptr defaultGLC() const;
1404   AMDGPUOperand::Ptr defaultSLC() const;
1405 
1406   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1407   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1408   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1409   AMDGPUOperand::Ptr defaultFlatOffset() const;
1410 
1411   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1412 
1413   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1414                OptionalImmIndexMap &OptionalIdx);
1415   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1416   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1417   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1418 
1419   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1420 
1421   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1422                bool IsAtomic = false);
1423   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1424 
1425   OperandMatchResultTy parseDim(OperandVector &Operands);
1426   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1427   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1428   AMDGPUOperand::Ptr defaultRowMask() const;
1429   AMDGPUOperand::Ptr defaultBankMask() const;
1430   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1431   AMDGPUOperand::Ptr defaultFI() const;
1432   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1433   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1434 
1435   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1436                                     AMDGPUOperand::ImmTy Type);
1437   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1438   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1439   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1440   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1441   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1442   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1443   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1444                uint64_t BasicInstType,
1445                bool SkipDstVcc = false,
1446                bool SkipSrcVcc = false);
1447 
1448   AMDGPUOperand::Ptr defaultBLGP() const;
1449   AMDGPUOperand::Ptr defaultCBSZ() const;
1450   AMDGPUOperand::Ptr defaultABID() const;
1451 
1452   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1453   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1454 };
1455 
1456 struct OptionalOperand {
1457   const char *Name;
1458   AMDGPUOperand::ImmTy Type;
1459   bool IsBit;
1460   bool (*ConvertResult)(int64_t&);
1461 };
1462 
1463 } // end anonymous namespace
1464 
1465 // May be called with integer type with equivalent bitwidth.
1466 static const fltSemantics *getFltSemantics(unsigned Size) {
1467   switch (Size) {
1468   case 4:
1469     return &APFloat::IEEEsingle();
1470   case 8:
1471     return &APFloat::IEEEdouble();
1472   case 2:
1473     return &APFloat::IEEEhalf();
1474   default:
1475     llvm_unreachable("unsupported fp type");
1476   }
1477 }
1478 
1479 static const fltSemantics *getFltSemantics(MVT VT) {
1480   return getFltSemantics(VT.getSizeInBits() / 8);
1481 }
1482 
1483 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1484   switch (OperandType) {
1485   case AMDGPU::OPERAND_REG_IMM_INT32:
1486   case AMDGPU::OPERAND_REG_IMM_FP32:
1487   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1488   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1489   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1490   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1491     return &APFloat::IEEEsingle();
1492   case AMDGPU::OPERAND_REG_IMM_INT64:
1493   case AMDGPU::OPERAND_REG_IMM_FP64:
1494   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1495   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1496     return &APFloat::IEEEdouble();
1497   case AMDGPU::OPERAND_REG_IMM_INT16:
1498   case AMDGPU::OPERAND_REG_IMM_FP16:
1499   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1500   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1501   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1502   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1503   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1504   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1505   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1506   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1507   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1508   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1509     return &APFloat::IEEEhalf();
1510   default:
1511     llvm_unreachable("unsupported fp type");
1512   }
1513 }
1514 
1515 //===----------------------------------------------------------------------===//
1516 // Operand
1517 //===----------------------------------------------------------------------===//
1518 
1519 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1520   bool Lost;
1521 
1522   // Convert literal to single precision
1523   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1524                                                APFloat::rmNearestTiesToEven,
1525                                                &Lost);
1526   // We allow precision lost but not overflow or underflow
1527   if (Status != APFloat::opOK &&
1528       Lost &&
1529       ((Status & APFloat::opOverflow)  != 0 ||
1530        (Status & APFloat::opUnderflow) != 0)) {
1531     return false;
1532   }
1533 
1534   return true;
1535 }
1536 
1537 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1538   return isUIntN(Size, Val) || isIntN(Size, Val);
1539 }
1540 
1541 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1542 
1543   // This is a hack to enable named inline values like
1544   // shared_base with both 32-bit and 64-bit operands.
1545   // Note that these values are defined as
1546   // 32-bit operands only.
1547   if (isInlineValue()) {
1548     return true;
1549   }
1550 
1551   if (!isImmTy(ImmTyNone)) {
1552     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1553     return false;
1554   }
1555   // TODO: We should avoid using host float here. It would be better to
1556   // check the float bit values which is what a few other places do.
1557   // We've had bot failures before due to weird NaN support on mips hosts.
1558 
1559   APInt Literal(64, Imm.Val);
1560 
1561   if (Imm.IsFPImm) { // We got fp literal token
1562     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1563       return AMDGPU::isInlinableLiteral64(Imm.Val,
1564                                           AsmParser->hasInv2PiInlineImm());
1565     }
1566 
1567     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1568     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1569       return false;
1570 
1571     if (type.getScalarSizeInBits() == 16) {
1572       return AMDGPU::isInlinableLiteral16(
1573         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1574         AsmParser->hasInv2PiInlineImm());
1575     }
1576 
1577     // Check if single precision literal is inlinable
1578     return AMDGPU::isInlinableLiteral32(
1579       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1580       AsmParser->hasInv2PiInlineImm());
1581   }
1582 
1583   // We got int literal token.
1584   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1585     return AMDGPU::isInlinableLiteral64(Imm.Val,
1586                                         AsmParser->hasInv2PiInlineImm());
1587   }
1588 
1589   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1590     return false;
1591   }
1592 
1593   if (type.getScalarSizeInBits() == 16) {
1594     return AMDGPU::isInlinableLiteral16(
1595       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1596       AsmParser->hasInv2PiInlineImm());
1597   }
1598 
1599   return AMDGPU::isInlinableLiteral32(
1600     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1601     AsmParser->hasInv2PiInlineImm());
1602 }
1603 
1604 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1605   // Check that this immediate can be added as literal
1606   if (!isImmTy(ImmTyNone)) {
1607     return false;
1608   }
1609 
1610   if (!Imm.IsFPImm) {
1611     // We got int literal token.
1612 
1613     if (type == MVT::f64 && hasFPModifiers()) {
1614       // Cannot apply fp modifiers to int literals preserving the same semantics
1615       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1616       // disable these cases.
1617       return false;
1618     }
1619 
1620     unsigned Size = type.getSizeInBits();
1621     if (Size == 64)
1622       Size = 32;
1623 
1624     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1625     // types.
1626     return isSafeTruncation(Imm.Val, Size);
1627   }
1628 
1629   // We got fp literal token
1630   if (type == MVT::f64) { // Expected 64-bit fp operand
1631     // We would set low 64-bits of literal to zeroes but we accept this literals
1632     return true;
1633   }
1634 
1635   if (type == MVT::i64) { // Expected 64-bit int operand
1636     // We don't allow fp literals in 64-bit integer instructions. It is
1637     // unclear how we should encode them.
1638     return false;
1639   }
1640 
1641   // We allow fp literals with f16x2 operands assuming that the specified
1642   // literal goes into the lower half and the upper half is zero. We also
1643   // require that the literal may be losslesly converted to f16.
1644   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1645                      (type == MVT::v2i16)? MVT::i16 : type;
1646 
1647   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1648   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1649 }
1650 
1651 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1652   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1653 }
1654 
1655 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1656   if (AsmParser->isVI())
1657     return isVReg32();
1658   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1659     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1660   else
1661     return false;
1662 }
1663 
1664 bool AMDGPUOperand::isSDWAFP16Operand() const {
1665   return isSDWAOperand(MVT::f16);
1666 }
1667 
1668 bool AMDGPUOperand::isSDWAFP32Operand() const {
1669   return isSDWAOperand(MVT::f32);
1670 }
1671 
1672 bool AMDGPUOperand::isSDWAInt16Operand() const {
1673   return isSDWAOperand(MVT::i16);
1674 }
1675 
1676 bool AMDGPUOperand::isSDWAInt32Operand() const {
1677   return isSDWAOperand(MVT::i32);
1678 }
1679 
1680 bool AMDGPUOperand::isBoolReg() const {
1681   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1682          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1683 }
1684 
1685 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1686 {
1687   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1688   assert(Size == 2 || Size == 4 || Size == 8);
1689 
1690   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1691 
1692   if (Imm.Mods.Abs) {
1693     Val &= ~FpSignMask;
1694   }
1695   if (Imm.Mods.Neg) {
1696     Val ^= FpSignMask;
1697   }
1698 
1699   return Val;
1700 }
1701 
1702 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1703   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1704                              Inst.getNumOperands())) {
1705     addLiteralImmOperand(Inst, Imm.Val,
1706                          ApplyModifiers &
1707                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1708   } else {
1709     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1710     Inst.addOperand(MCOperand::createImm(Imm.Val));
1711   }
1712 }
1713 
1714 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1715   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1716   auto OpNum = Inst.getNumOperands();
1717   // Check that this operand accepts literals
1718   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1719 
1720   if (ApplyModifiers) {
1721     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1722     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1723     Val = applyInputFPModifiers(Val, Size);
1724   }
1725 
1726   APInt Literal(64, Val);
1727   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1728 
1729   if (Imm.IsFPImm) { // We got fp literal token
1730     switch (OpTy) {
1731     case AMDGPU::OPERAND_REG_IMM_INT64:
1732     case AMDGPU::OPERAND_REG_IMM_FP64:
1733     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1734     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1735       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1736                                        AsmParser->hasInv2PiInlineImm())) {
1737         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1738         return;
1739       }
1740 
1741       // Non-inlineable
1742       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1743         // For fp operands we check if low 32 bits are zeros
1744         if (Literal.getLoBits(32) != 0) {
1745           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1746           "Can't encode literal as exact 64-bit floating-point operand. "
1747           "Low 32-bits will be set to zero");
1748         }
1749 
1750         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1751         return;
1752       }
1753 
1754       // We don't allow fp literals in 64-bit integer instructions. It is
1755       // unclear how we should encode them. This case should be checked earlier
1756       // in predicate methods (isLiteralImm())
1757       llvm_unreachable("fp literal in 64-bit integer instruction.");
1758 
1759     case AMDGPU::OPERAND_REG_IMM_INT32:
1760     case AMDGPU::OPERAND_REG_IMM_FP32:
1761     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1762     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1763     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1764     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1765     case AMDGPU::OPERAND_REG_IMM_INT16:
1766     case AMDGPU::OPERAND_REG_IMM_FP16:
1767     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1768     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1769     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1770     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1771     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1772     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1773     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1774     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1775     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1776     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1777       bool lost;
1778       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1779       // Convert literal to single precision
1780       FPLiteral.convert(*getOpFltSemantics(OpTy),
1781                         APFloat::rmNearestTiesToEven, &lost);
1782       // We allow precision lost but not overflow or underflow. This should be
1783       // checked earlier in isLiteralImm()
1784 
1785       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1786       Inst.addOperand(MCOperand::createImm(ImmVal));
1787       return;
1788     }
1789     default:
1790       llvm_unreachable("invalid operand size");
1791     }
1792 
1793     return;
1794   }
1795 
1796   // We got int literal token.
1797   // Only sign extend inline immediates.
1798   switch (OpTy) {
1799   case AMDGPU::OPERAND_REG_IMM_INT32:
1800   case AMDGPU::OPERAND_REG_IMM_FP32:
1801   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1802   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1803   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1805   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1806   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1807     if (isSafeTruncation(Val, 32) &&
1808         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1809                                      AsmParser->hasInv2PiInlineImm())) {
1810       Inst.addOperand(MCOperand::createImm(Val));
1811       return;
1812     }
1813 
1814     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1815     return;
1816 
1817   case AMDGPU::OPERAND_REG_IMM_INT64:
1818   case AMDGPU::OPERAND_REG_IMM_FP64:
1819   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1820   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1821     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1822       Inst.addOperand(MCOperand::createImm(Val));
1823       return;
1824     }
1825 
1826     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1827     return;
1828 
1829   case AMDGPU::OPERAND_REG_IMM_INT16:
1830   case AMDGPU::OPERAND_REG_IMM_FP16:
1831   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1832   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1833   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1834   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1835     if (isSafeTruncation(Val, 16) &&
1836         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1837                                      AsmParser->hasInv2PiInlineImm())) {
1838       Inst.addOperand(MCOperand::createImm(Val));
1839       return;
1840     }
1841 
1842     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1843     return;
1844 
1845   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1846   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1847   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1849     assert(isSafeTruncation(Val, 16));
1850     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1851                                         AsmParser->hasInv2PiInlineImm()));
1852 
1853     Inst.addOperand(MCOperand::createImm(Val));
1854     return;
1855   }
1856   default:
1857     llvm_unreachable("invalid operand size");
1858   }
1859 }
1860 
1861 template <unsigned Bitwidth>
1862 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1863   APInt Literal(64, Imm.Val);
1864 
1865   if (!Imm.IsFPImm) {
1866     // We got int literal token.
1867     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1868     return;
1869   }
1870 
1871   bool Lost;
1872   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1873   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1874                     APFloat::rmNearestTiesToEven, &Lost);
1875   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1876 }
1877 
1878 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1879   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1880 }
1881 
1882 static bool isInlineValue(unsigned Reg) {
1883   switch (Reg) {
1884   case AMDGPU::SRC_SHARED_BASE:
1885   case AMDGPU::SRC_SHARED_LIMIT:
1886   case AMDGPU::SRC_PRIVATE_BASE:
1887   case AMDGPU::SRC_PRIVATE_LIMIT:
1888   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1889     return true;
1890   case AMDGPU::SRC_VCCZ:
1891   case AMDGPU::SRC_EXECZ:
1892   case AMDGPU::SRC_SCC:
1893     return true;
1894   case AMDGPU::SGPR_NULL:
1895     return true;
1896   default:
1897     return false;
1898   }
1899 }
1900 
1901 bool AMDGPUOperand::isInlineValue() const {
1902   return isRegKind() && ::isInlineValue(getReg());
1903 }
1904 
1905 //===----------------------------------------------------------------------===//
1906 // AsmParser
1907 //===----------------------------------------------------------------------===//
1908 
1909 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1910   if (Is == IS_VGPR) {
1911     switch (RegWidth) {
1912       default: return -1;
1913       case 1: return AMDGPU::VGPR_32RegClassID;
1914       case 2: return AMDGPU::VReg_64RegClassID;
1915       case 3: return AMDGPU::VReg_96RegClassID;
1916       case 4: return AMDGPU::VReg_128RegClassID;
1917       case 5: return AMDGPU::VReg_160RegClassID;
1918       case 8: return AMDGPU::VReg_256RegClassID;
1919       case 16: return AMDGPU::VReg_512RegClassID;
1920       case 32: return AMDGPU::VReg_1024RegClassID;
1921     }
1922   } else if (Is == IS_TTMP) {
1923     switch (RegWidth) {
1924       default: return -1;
1925       case 1: return AMDGPU::TTMP_32RegClassID;
1926       case 2: return AMDGPU::TTMP_64RegClassID;
1927       case 4: return AMDGPU::TTMP_128RegClassID;
1928       case 8: return AMDGPU::TTMP_256RegClassID;
1929       case 16: return AMDGPU::TTMP_512RegClassID;
1930     }
1931   } else if (Is == IS_SGPR) {
1932     switch (RegWidth) {
1933       default: return -1;
1934       case 1: return AMDGPU::SGPR_32RegClassID;
1935       case 2: return AMDGPU::SGPR_64RegClassID;
1936       case 4: return AMDGPU::SGPR_128RegClassID;
1937       case 8: return AMDGPU::SGPR_256RegClassID;
1938       case 16: return AMDGPU::SGPR_512RegClassID;
1939     }
1940   } else if (Is == IS_AGPR) {
1941     switch (RegWidth) {
1942       default: return -1;
1943       case 1: return AMDGPU::AGPR_32RegClassID;
1944       case 2: return AMDGPU::AReg_64RegClassID;
1945       case 4: return AMDGPU::AReg_128RegClassID;
1946       case 16: return AMDGPU::AReg_512RegClassID;
1947       case 32: return AMDGPU::AReg_1024RegClassID;
1948     }
1949   }
1950   return -1;
1951 }
1952 
1953 static unsigned getSpecialRegForName(StringRef RegName) {
1954   return StringSwitch<unsigned>(RegName)
1955     .Case("exec", AMDGPU::EXEC)
1956     .Case("vcc", AMDGPU::VCC)
1957     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1958     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1959     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1960     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1961     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1962     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1963     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1964     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1965     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1966     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1967     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1968     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1969     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1970     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1971     .Case("m0", AMDGPU::M0)
1972     .Case("vccz", AMDGPU::SRC_VCCZ)
1973     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1974     .Case("execz", AMDGPU::SRC_EXECZ)
1975     .Case("src_execz", AMDGPU::SRC_EXECZ)
1976     .Case("scc", AMDGPU::SRC_SCC)
1977     .Case("src_scc", AMDGPU::SRC_SCC)
1978     .Case("tba", AMDGPU::TBA)
1979     .Case("tma", AMDGPU::TMA)
1980     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1981     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1982     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1983     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1984     .Case("vcc_lo", AMDGPU::VCC_LO)
1985     .Case("vcc_hi", AMDGPU::VCC_HI)
1986     .Case("exec_lo", AMDGPU::EXEC_LO)
1987     .Case("exec_hi", AMDGPU::EXEC_HI)
1988     .Case("tma_lo", AMDGPU::TMA_LO)
1989     .Case("tma_hi", AMDGPU::TMA_HI)
1990     .Case("tba_lo", AMDGPU::TBA_LO)
1991     .Case("tba_hi", AMDGPU::TBA_HI)
1992     .Case("pc", AMDGPU::PC_REG)
1993     .Case("null", AMDGPU::SGPR_NULL)
1994     .Default(AMDGPU::NoRegister);
1995 }
1996 
1997 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1998                                     SMLoc &EndLoc, bool RestoreOnFailure) {
1999   auto R = parseRegister();
2000   if (!R) return true;
2001   assert(R->isReg());
2002   RegNo = R->getReg();
2003   StartLoc = R->getStartLoc();
2004   EndLoc = R->getEndLoc();
2005   return false;
2006 }
2007 
2008 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2009                                     SMLoc &EndLoc) {
2010   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2011 }
2012 
2013 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2014                                                        SMLoc &StartLoc,
2015                                                        SMLoc &EndLoc) {
2016   bool Result =
2017       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2018   bool PendingErrors = getParser().hasPendingError();
2019   getParser().clearPendingErrors();
2020   if (PendingErrors)
2021     return MatchOperand_ParseFail;
2022   if (Result)
2023     return MatchOperand_NoMatch;
2024   return MatchOperand_Success;
2025 }
2026 
2027 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2028                                             RegisterKind RegKind, unsigned Reg1) {
2029   switch (RegKind) {
2030   case IS_SPECIAL:
2031     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2032       Reg = AMDGPU::EXEC;
2033       RegWidth = 2;
2034       return true;
2035     }
2036     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2037       Reg = AMDGPU::FLAT_SCR;
2038       RegWidth = 2;
2039       return true;
2040     }
2041     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2042       Reg = AMDGPU::XNACK_MASK;
2043       RegWidth = 2;
2044       return true;
2045     }
2046     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2047       Reg = AMDGPU::VCC;
2048       RegWidth = 2;
2049       return true;
2050     }
2051     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2052       Reg = AMDGPU::TBA;
2053       RegWidth = 2;
2054       return true;
2055     }
2056     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2057       Reg = AMDGPU::TMA;
2058       RegWidth = 2;
2059       return true;
2060     }
2061     return false;
2062   case IS_VGPR:
2063   case IS_SGPR:
2064   case IS_AGPR:
2065   case IS_TTMP:
2066     if (Reg1 != Reg + RegWidth) {
2067       return false;
2068     }
2069     RegWidth++;
2070     return true;
2071   default:
2072     llvm_unreachable("unexpected register kind");
2073   }
2074 }
2075 
2076 struct RegInfo {
2077   StringLiteral Name;
2078   RegisterKind Kind;
2079 };
2080 
2081 static constexpr RegInfo RegularRegisters[] = {
2082   {{"v"},    IS_VGPR},
2083   {{"s"},    IS_SGPR},
2084   {{"ttmp"}, IS_TTMP},
2085   {{"acc"},  IS_AGPR},
2086   {{"a"},    IS_AGPR},
2087 };
2088 
2089 static bool isRegularReg(RegisterKind Kind) {
2090   return Kind == IS_VGPR ||
2091          Kind == IS_SGPR ||
2092          Kind == IS_TTMP ||
2093          Kind == IS_AGPR;
2094 }
2095 
2096 static const RegInfo* getRegularRegInfo(StringRef Str) {
2097   for (const RegInfo &Reg : RegularRegisters)
2098     if (Str.startswith(Reg.Name))
2099       return &Reg;
2100   return nullptr;
2101 }
2102 
2103 static bool getRegNum(StringRef Str, unsigned& Num) {
2104   return !Str.getAsInteger(10, Num);
2105 }
2106 
2107 bool
2108 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2109                             const AsmToken &NextToken) const {
2110 
2111   // A list of consecutive registers: [s0,s1,s2,s3]
2112   if (Token.is(AsmToken::LBrac))
2113     return true;
2114 
2115   if (!Token.is(AsmToken::Identifier))
2116     return false;
2117 
2118   // A single register like s0 or a range of registers like s[0:1]
2119 
2120   StringRef Str = Token.getString();
2121   const RegInfo *Reg = getRegularRegInfo(Str);
2122   if (Reg) {
2123     StringRef RegName = Reg->Name;
2124     StringRef RegSuffix = Str.substr(RegName.size());
2125     if (!RegSuffix.empty()) {
2126       unsigned Num;
2127       // A single register with an index: rXX
2128       if (getRegNum(RegSuffix, Num))
2129         return true;
2130     } else {
2131       // A range of registers: r[XX:YY].
2132       if (NextToken.is(AsmToken::LBrac))
2133         return true;
2134     }
2135   }
2136 
2137   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2138 }
2139 
2140 bool
2141 AMDGPUAsmParser::isRegister()
2142 {
2143   return isRegister(getToken(), peekToken());
2144 }
2145 
2146 unsigned
2147 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2148                                unsigned RegNum,
2149                                unsigned RegWidth) {
2150 
2151   assert(isRegularReg(RegKind));
2152 
2153   unsigned AlignSize = 1;
2154   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2155     // SGPR and TTMP registers must be aligned.
2156     // Max required alignment is 4 dwords.
2157     AlignSize = std::min(RegWidth, 4u);
2158   }
2159 
2160   if (RegNum % AlignSize != 0)
2161     return AMDGPU::NoRegister;
2162 
2163   unsigned RegIdx = RegNum / AlignSize;
2164   int RCID = getRegClass(RegKind, RegWidth);
2165   if (RCID == -1)
2166     return AMDGPU::NoRegister;
2167 
2168   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2169   const MCRegisterClass RC = TRI->getRegClass(RCID);
2170   if (RegIdx >= RC.getNumRegs())
2171     return AMDGPU::NoRegister;
2172 
2173   return RC.getRegister(RegIdx);
2174 }
2175 
2176 bool
2177 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2178   int64_t RegLo, RegHi;
2179   if (!trySkipToken(AsmToken::LBrac))
2180     return false;
2181 
2182   if (!parseExpr(RegLo))
2183     return false;
2184 
2185   if (trySkipToken(AsmToken::Colon)) {
2186     if (!parseExpr(RegHi))
2187       return false;
2188   } else {
2189     RegHi = RegLo;
2190   }
2191 
2192   if (!trySkipToken(AsmToken::RBrac))
2193     return false;
2194 
2195   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2196     return false;
2197 
2198   Num = static_cast<unsigned>(RegLo);
2199   Width = (RegHi - RegLo) + 1;
2200   return true;
2201 }
2202 
2203 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2204                                           unsigned &RegNum, unsigned &RegWidth,
2205                                           SmallVectorImpl<AsmToken> &Tokens) {
2206   assert(isToken(AsmToken::Identifier));
2207   unsigned Reg = getSpecialRegForName(getTokenStr());
2208   if (Reg) {
2209     RegNum = 0;
2210     RegWidth = 1;
2211     RegKind = IS_SPECIAL;
2212     Tokens.push_back(getToken());
2213     lex(); // skip register name
2214   }
2215   return Reg;
2216 }
2217 
2218 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2219                                           unsigned &RegNum, unsigned &RegWidth,
2220                                           SmallVectorImpl<AsmToken> &Tokens) {
2221   assert(isToken(AsmToken::Identifier));
2222   StringRef RegName = getTokenStr();
2223 
2224   const RegInfo *RI = getRegularRegInfo(RegName);
2225   if (!RI)
2226     return AMDGPU::NoRegister;
2227   Tokens.push_back(getToken());
2228   lex(); // skip register name
2229 
2230   RegKind = RI->Kind;
2231   StringRef RegSuffix = RegName.substr(RI->Name.size());
2232   if (!RegSuffix.empty()) {
2233     // Single 32-bit register: vXX.
2234     if (!getRegNum(RegSuffix, RegNum))
2235       return AMDGPU::NoRegister;
2236     RegWidth = 1;
2237   } else {
2238     // Range of registers: v[XX:YY]. ":YY" is optional.
2239     if (!ParseRegRange(RegNum, RegWidth))
2240       return AMDGPU::NoRegister;
2241   }
2242 
2243   return getRegularReg(RegKind, RegNum, RegWidth);
2244 }
2245 
2246 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2247                                        unsigned &RegWidth,
2248                                        SmallVectorImpl<AsmToken> &Tokens) {
2249   unsigned Reg = AMDGPU::NoRegister;
2250 
2251   if (!trySkipToken(AsmToken::LBrac))
2252     return AMDGPU::NoRegister;
2253 
2254   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2255 
2256   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2257     return AMDGPU::NoRegister;
2258   if (RegWidth != 1)
2259     return AMDGPU::NoRegister;
2260 
2261   for (; trySkipToken(AsmToken::Comma); ) {
2262     RegisterKind NextRegKind;
2263     unsigned NextReg, NextRegNum, NextRegWidth;
2264 
2265     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
2266                              Tokens))
2267       return AMDGPU::NoRegister;
2268     if (NextRegWidth != 1)
2269       return AMDGPU::NoRegister;
2270     if (NextRegKind != RegKind)
2271       return AMDGPU::NoRegister;
2272     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2273       return AMDGPU::NoRegister;
2274   }
2275 
2276   if (!trySkipToken(AsmToken::RBrac))
2277     return AMDGPU::NoRegister;
2278 
2279   if (isRegularReg(RegKind))
2280     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2281 
2282   return Reg;
2283 }
2284 
2285 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2286                                           unsigned &RegNum, unsigned &RegWidth,
2287                                           SmallVectorImpl<AsmToken> &Tokens) {
2288   Reg = AMDGPU::NoRegister;
2289 
2290   if (isToken(AsmToken::Identifier)) {
2291     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2292     if (Reg == AMDGPU::NoRegister)
2293       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2294   } else {
2295     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2296   }
2297 
2298   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2299   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2300 }
2301 
2302 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2303                                           unsigned &RegNum, unsigned &RegWidth,
2304                                           bool RestoreOnFailure) {
2305   Reg = AMDGPU::NoRegister;
2306 
2307   SmallVector<AsmToken, 1> Tokens;
2308   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2309     if (RestoreOnFailure) {
2310       while (!Tokens.empty()) {
2311         getLexer().UnLex(Tokens.pop_back_val());
2312       }
2313     }
2314     return true;
2315   }
2316   return false;
2317 }
2318 
2319 Optional<StringRef>
2320 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2321   switch (RegKind) {
2322   case IS_VGPR:
2323     return StringRef(".amdgcn.next_free_vgpr");
2324   case IS_SGPR:
2325     return StringRef(".amdgcn.next_free_sgpr");
2326   default:
2327     return None;
2328   }
2329 }
2330 
2331 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2332   auto SymbolName = getGprCountSymbolName(RegKind);
2333   assert(SymbolName && "initializing invalid register kind");
2334   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2335   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2336 }
2337 
2338 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2339                                             unsigned DwordRegIndex,
2340                                             unsigned RegWidth) {
2341   // Symbols are only defined for GCN targets
2342   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2343     return true;
2344 
2345   auto SymbolName = getGprCountSymbolName(RegKind);
2346   if (!SymbolName)
2347     return true;
2348   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2349 
2350   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2351   int64_t OldCount;
2352 
2353   if (!Sym->isVariable())
2354     return !Error(getParser().getTok().getLoc(),
2355                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2356   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2357     return !Error(
2358         getParser().getTok().getLoc(),
2359         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2360 
2361   if (OldCount <= NewMax)
2362     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2363 
2364   return true;
2365 }
2366 
2367 std::unique_ptr<AMDGPUOperand>
2368 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2369   const auto &Tok = Parser.getTok();
2370   SMLoc StartLoc = Tok.getLoc();
2371   SMLoc EndLoc = Tok.getEndLoc();
2372   RegisterKind RegKind;
2373   unsigned Reg, RegNum, RegWidth;
2374 
2375   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2376     //FIXME: improve error messages (bug 41303).
2377     Error(StartLoc, "not a valid operand.");
2378     return nullptr;
2379   }
2380   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2381     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2382       return nullptr;
2383   } else
2384     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2385   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2386 }
2387 
2388 OperandMatchResultTy
2389 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2390   // TODO: add syntactic sugar for 1/(2*PI)
2391 
2392   assert(!isRegister());
2393   assert(!isModifier());
2394 
2395   const auto& Tok = getToken();
2396   const auto& NextTok = peekToken();
2397   bool IsReal = Tok.is(AsmToken::Real);
2398   SMLoc S = getLoc();
2399   bool Negate = false;
2400 
2401   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2402     lex();
2403     IsReal = true;
2404     Negate = true;
2405   }
2406 
2407   if (IsReal) {
2408     // Floating-point expressions are not supported.
2409     // Can only allow floating-point literals with an
2410     // optional sign.
2411 
2412     StringRef Num = getTokenStr();
2413     lex();
2414 
2415     APFloat RealVal(APFloat::IEEEdouble());
2416     auto roundMode = APFloat::rmNearestTiesToEven;
2417     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2418       return MatchOperand_ParseFail;
2419     }
2420     if (Negate)
2421       RealVal.changeSign();
2422 
2423     Operands.push_back(
2424       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2425                                AMDGPUOperand::ImmTyNone, true));
2426 
2427     return MatchOperand_Success;
2428 
2429   } else {
2430     int64_t IntVal;
2431     const MCExpr *Expr;
2432     SMLoc S = getLoc();
2433 
2434     if (HasSP3AbsModifier) {
2435       // This is a workaround for handling expressions
2436       // as arguments of SP3 'abs' modifier, for example:
2437       //     |1.0|
2438       //     |-1|
2439       //     |1+x|
2440       // This syntax is not compatible with syntax of standard
2441       // MC expressions (due to the trailing '|').
2442       SMLoc EndLoc;
2443       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2444         return MatchOperand_ParseFail;
2445     } else {
2446       if (Parser.parseExpression(Expr))
2447         return MatchOperand_ParseFail;
2448     }
2449 
2450     if (Expr->evaluateAsAbsolute(IntVal)) {
2451       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2452     } else {
2453       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2454     }
2455 
2456     return MatchOperand_Success;
2457   }
2458 
2459   return MatchOperand_NoMatch;
2460 }
2461 
2462 OperandMatchResultTy
2463 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2464   if (!isRegister())
2465     return MatchOperand_NoMatch;
2466 
2467   if (auto R = parseRegister()) {
2468     assert(R->isReg());
2469     Operands.push_back(std::move(R));
2470     return MatchOperand_Success;
2471   }
2472   return MatchOperand_ParseFail;
2473 }
2474 
2475 OperandMatchResultTy
2476 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2477   auto res = parseReg(Operands);
2478   if (res != MatchOperand_NoMatch) {
2479     return res;
2480   } else if (isModifier()) {
2481     return MatchOperand_NoMatch;
2482   } else {
2483     return parseImm(Operands, HasSP3AbsMod);
2484   }
2485 }
2486 
2487 bool
2488 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2489   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2490     const auto &str = Token.getString();
2491     return str == "abs" || str == "neg" || str == "sext";
2492   }
2493   return false;
2494 }
2495 
2496 bool
2497 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2498   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2499 }
2500 
2501 bool
2502 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2503   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2504 }
2505 
2506 bool
2507 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2508   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2509 }
2510 
2511 // Check if this is an operand modifier or an opcode modifier
2512 // which may look like an expression but it is not. We should
2513 // avoid parsing these modifiers as expressions. Currently
2514 // recognized sequences are:
2515 //   |...|
2516 //   abs(...)
2517 //   neg(...)
2518 //   sext(...)
2519 //   -reg
2520 //   -|...|
2521 //   -abs(...)
2522 //   name:...
2523 // Note that simple opcode modifiers like 'gds' may be parsed as
2524 // expressions; this is a special case. See getExpressionAsToken.
2525 //
2526 bool
2527 AMDGPUAsmParser::isModifier() {
2528 
2529   AsmToken Tok = getToken();
2530   AsmToken NextToken[2];
2531   peekTokens(NextToken);
2532 
2533   return isOperandModifier(Tok, NextToken[0]) ||
2534          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2535          isOpcodeModifierWithVal(Tok, NextToken[0]);
2536 }
2537 
2538 // Check if the current token is an SP3 'neg' modifier.
2539 // Currently this modifier is allowed in the following context:
2540 //
2541 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2542 // 2. Before an 'abs' modifier: -abs(...)
2543 // 3. Before an SP3 'abs' modifier: -|...|
2544 //
2545 // In all other cases "-" is handled as a part
2546 // of an expression that follows the sign.
2547 //
2548 // Note: When "-" is followed by an integer literal,
2549 // this is interpreted as integer negation rather
2550 // than a floating-point NEG modifier applied to N.
2551 // Beside being contr-intuitive, such use of floating-point
2552 // NEG modifier would have resulted in different meaning
2553 // of integer literals used with VOP1/2/C and VOP3,
2554 // for example:
2555 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2556 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2557 // Negative fp literals with preceding "-" are
2558 // handled likewise for unifomtity
2559 //
2560 bool
2561 AMDGPUAsmParser::parseSP3NegModifier() {
2562 
2563   AsmToken NextToken[2];
2564   peekTokens(NextToken);
2565 
2566   if (isToken(AsmToken::Minus) &&
2567       (isRegister(NextToken[0], NextToken[1]) ||
2568        NextToken[0].is(AsmToken::Pipe) ||
2569        isId(NextToken[0], "abs"))) {
2570     lex();
2571     return true;
2572   }
2573 
2574   return false;
2575 }
2576 
2577 OperandMatchResultTy
2578 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2579                                               bool AllowImm) {
2580   bool Neg, SP3Neg;
2581   bool Abs, SP3Abs;
2582   SMLoc Loc;
2583 
2584   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2585   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2586     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2587     return MatchOperand_ParseFail;
2588   }
2589 
2590   SP3Neg = parseSP3NegModifier();
2591 
2592   Loc = getLoc();
2593   Neg = trySkipId("neg");
2594   if (Neg && SP3Neg) {
2595     Error(Loc, "expected register or immediate");
2596     return MatchOperand_ParseFail;
2597   }
2598   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2599     return MatchOperand_ParseFail;
2600 
2601   Abs = trySkipId("abs");
2602   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2603     return MatchOperand_ParseFail;
2604 
2605   Loc = getLoc();
2606   SP3Abs = trySkipToken(AsmToken::Pipe);
2607   if (Abs && SP3Abs) {
2608     Error(Loc, "expected register or immediate");
2609     return MatchOperand_ParseFail;
2610   }
2611 
2612   OperandMatchResultTy Res;
2613   if (AllowImm) {
2614     Res = parseRegOrImm(Operands, SP3Abs);
2615   } else {
2616     Res = parseReg(Operands);
2617   }
2618   if (Res != MatchOperand_Success) {
2619     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2620   }
2621 
2622   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2623     return MatchOperand_ParseFail;
2624   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2625     return MatchOperand_ParseFail;
2626   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2627     return MatchOperand_ParseFail;
2628 
2629   AMDGPUOperand::Modifiers Mods;
2630   Mods.Abs = Abs || SP3Abs;
2631   Mods.Neg = Neg || SP3Neg;
2632 
2633   if (Mods.hasFPModifiers()) {
2634     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2635     if (Op.isExpr()) {
2636       Error(Op.getStartLoc(), "expected an absolute expression");
2637       return MatchOperand_ParseFail;
2638     }
2639     Op.setModifiers(Mods);
2640   }
2641   return MatchOperand_Success;
2642 }
2643 
2644 OperandMatchResultTy
2645 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2646                                                bool AllowImm) {
2647   bool Sext = trySkipId("sext");
2648   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2649     return MatchOperand_ParseFail;
2650 
2651   OperandMatchResultTy Res;
2652   if (AllowImm) {
2653     Res = parseRegOrImm(Operands);
2654   } else {
2655     Res = parseReg(Operands);
2656   }
2657   if (Res != MatchOperand_Success) {
2658     return Sext? MatchOperand_ParseFail : Res;
2659   }
2660 
2661   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2662     return MatchOperand_ParseFail;
2663 
2664   AMDGPUOperand::Modifiers Mods;
2665   Mods.Sext = Sext;
2666 
2667   if (Mods.hasIntModifiers()) {
2668     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2669     if (Op.isExpr()) {
2670       Error(Op.getStartLoc(), "expected an absolute expression");
2671       return MatchOperand_ParseFail;
2672     }
2673     Op.setModifiers(Mods);
2674   }
2675 
2676   return MatchOperand_Success;
2677 }
2678 
2679 OperandMatchResultTy
2680 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2681   return parseRegOrImmWithFPInputMods(Operands, false);
2682 }
2683 
2684 OperandMatchResultTy
2685 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2686   return parseRegOrImmWithIntInputMods(Operands, false);
2687 }
2688 
2689 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2690   auto Loc = getLoc();
2691   if (trySkipId("off")) {
2692     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2693                                                 AMDGPUOperand::ImmTyOff, false));
2694     return MatchOperand_Success;
2695   }
2696 
2697   if (!isRegister())
2698     return MatchOperand_NoMatch;
2699 
2700   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2701   if (Reg) {
2702     Operands.push_back(std::move(Reg));
2703     return MatchOperand_Success;
2704   }
2705 
2706   return MatchOperand_ParseFail;
2707 
2708 }
2709 
2710 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2711   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2712 
2713   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2714       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2715       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2716       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2717     return Match_InvalidOperand;
2718 
2719   if ((TSFlags & SIInstrFlags::VOP3) &&
2720       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2721       getForcedEncodingSize() != 64)
2722     return Match_PreferE32;
2723 
2724   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2725       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2726     // v_mac_f32/16 allow only dst_sel == DWORD;
2727     auto OpNum =
2728         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2729     const auto &Op = Inst.getOperand(OpNum);
2730     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2731       return Match_InvalidOperand;
2732     }
2733   }
2734 
2735   return Match_Success;
2736 }
2737 
2738 // What asm variants we should check
2739 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2740   if (getForcedEncodingSize() == 32) {
2741     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2742     return makeArrayRef(Variants);
2743   }
2744 
2745   if (isForcedVOP3()) {
2746     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2747     return makeArrayRef(Variants);
2748   }
2749 
2750   if (isForcedSDWA()) {
2751     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2752                                         AMDGPUAsmVariants::SDWA9};
2753     return makeArrayRef(Variants);
2754   }
2755 
2756   if (isForcedDPP()) {
2757     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2758     return makeArrayRef(Variants);
2759   }
2760 
2761   static const unsigned Variants[] = {
2762     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2763     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2764   };
2765 
2766   return makeArrayRef(Variants);
2767 }
2768 
2769 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2770   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2771   const unsigned Num = Desc.getNumImplicitUses();
2772   for (unsigned i = 0; i < Num; ++i) {
2773     unsigned Reg = Desc.ImplicitUses[i];
2774     switch (Reg) {
2775     case AMDGPU::FLAT_SCR:
2776     case AMDGPU::VCC:
2777     case AMDGPU::VCC_LO:
2778     case AMDGPU::VCC_HI:
2779     case AMDGPU::M0:
2780       return Reg;
2781     default:
2782       break;
2783     }
2784   }
2785   return AMDGPU::NoRegister;
2786 }
2787 
2788 // NB: This code is correct only when used to check constant
2789 // bus limitations because GFX7 support no f16 inline constants.
2790 // Note that there are no cases when a GFX7 opcode violates
2791 // constant bus limitations due to the use of an f16 constant.
2792 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2793                                        unsigned OpIdx) const {
2794   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2795 
2796   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2797     return false;
2798   }
2799 
2800   const MCOperand &MO = Inst.getOperand(OpIdx);
2801 
2802   int64_t Val = MO.getImm();
2803   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2804 
2805   switch (OpSize) { // expected operand size
2806   case 8:
2807     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2808   case 4:
2809     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2810   case 2: {
2811     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2812     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2813         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2814         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2815         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2816         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2817         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2818       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2819     } else {
2820       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2821     }
2822   }
2823   default:
2824     llvm_unreachable("invalid operand size");
2825   }
2826 }
2827 
2828 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2829   if (!isGFX10())
2830     return 1;
2831 
2832   switch (Opcode) {
2833   // 64-bit shift instructions can use only one scalar value input
2834   case AMDGPU::V_LSHLREV_B64:
2835   case AMDGPU::V_LSHLREV_B64_gfx10:
2836   case AMDGPU::V_LSHL_B64:
2837   case AMDGPU::V_LSHRREV_B64:
2838   case AMDGPU::V_LSHRREV_B64_gfx10:
2839   case AMDGPU::V_LSHR_B64:
2840   case AMDGPU::V_ASHRREV_I64:
2841   case AMDGPU::V_ASHRREV_I64_gfx10:
2842   case AMDGPU::V_ASHR_I64:
2843     return 1;
2844   default:
2845     return 2;
2846   }
2847 }
2848 
2849 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2850   const MCOperand &MO = Inst.getOperand(OpIdx);
2851   if (MO.isImm()) {
2852     return !isInlineConstant(Inst, OpIdx);
2853   } else if (MO.isReg()) {
2854     auto Reg = MO.getReg();
2855     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2856     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2857   } else {
2858     return true;
2859   }
2860 }
2861 
2862 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2863   const unsigned Opcode = Inst.getOpcode();
2864   const MCInstrDesc &Desc = MII.get(Opcode);
2865   unsigned ConstantBusUseCount = 0;
2866   unsigned NumLiterals = 0;
2867   unsigned LiteralSize;
2868 
2869   if (Desc.TSFlags &
2870       (SIInstrFlags::VOPC |
2871        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2872        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2873        SIInstrFlags::SDWA)) {
2874     // Check special imm operands (used by madmk, etc)
2875     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2876       ++ConstantBusUseCount;
2877     }
2878 
2879     SmallDenseSet<unsigned> SGPRsUsed;
2880     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2881     if (SGPRUsed != AMDGPU::NoRegister) {
2882       SGPRsUsed.insert(SGPRUsed);
2883       ++ConstantBusUseCount;
2884     }
2885 
2886     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2887     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2888     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2889 
2890     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2891 
2892     for (int OpIdx : OpIndices) {
2893       if (OpIdx == -1) break;
2894 
2895       const MCOperand &MO = Inst.getOperand(OpIdx);
2896       if (usesConstantBus(Inst, OpIdx)) {
2897         if (MO.isReg()) {
2898           const unsigned Reg = mc2PseudoReg(MO.getReg());
2899           // Pairs of registers with a partial intersections like these
2900           //   s0, s[0:1]
2901           //   flat_scratch_lo, flat_scratch
2902           //   flat_scratch_lo, flat_scratch_hi
2903           // are theoretically valid but they are disabled anyway.
2904           // Note that this code mimics SIInstrInfo::verifyInstruction
2905           if (!SGPRsUsed.count(Reg)) {
2906             SGPRsUsed.insert(Reg);
2907             ++ConstantBusUseCount;
2908           }
2909         } else { // Expression or a literal
2910 
2911           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2912             continue; // special operand like VINTERP attr_chan
2913 
2914           // An instruction may use only one literal.
2915           // This has been validated on the previous step.
2916           // See validateVOP3Literal.
2917           // This literal may be used as more than one operand.
2918           // If all these operands are of the same size,
2919           // this literal counts as one scalar value.
2920           // Otherwise it counts as 2 scalar values.
2921           // See "GFX10 Shader Programming", section 3.6.2.3.
2922 
2923           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2924           if (Size < 4) Size = 4;
2925 
2926           if (NumLiterals == 0) {
2927             NumLiterals = 1;
2928             LiteralSize = Size;
2929           } else if (LiteralSize != Size) {
2930             NumLiterals = 2;
2931           }
2932         }
2933       }
2934     }
2935   }
2936   ConstantBusUseCount += NumLiterals;
2937 
2938   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2939 }
2940 
2941 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2942   const unsigned Opcode = Inst.getOpcode();
2943   const MCInstrDesc &Desc = MII.get(Opcode);
2944 
2945   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2946   if (DstIdx == -1 ||
2947       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2948     return true;
2949   }
2950 
2951   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2952 
2953   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2954   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2955   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2956 
2957   assert(DstIdx != -1);
2958   const MCOperand &Dst = Inst.getOperand(DstIdx);
2959   assert(Dst.isReg());
2960   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2961 
2962   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2963 
2964   for (int SrcIdx : SrcIndices) {
2965     if (SrcIdx == -1) break;
2966     const MCOperand &Src = Inst.getOperand(SrcIdx);
2967     if (Src.isReg()) {
2968       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2969       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2970         return false;
2971       }
2972     }
2973   }
2974 
2975   return true;
2976 }
2977 
2978 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2979 
2980   const unsigned Opc = Inst.getOpcode();
2981   const MCInstrDesc &Desc = MII.get(Opc);
2982 
2983   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2984     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2985     assert(ClampIdx != -1);
2986     return Inst.getOperand(ClampIdx).getImm() == 0;
2987   }
2988 
2989   return true;
2990 }
2991 
2992 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2993 
2994   const unsigned Opc = Inst.getOpcode();
2995   const MCInstrDesc &Desc = MII.get(Opc);
2996 
2997   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2998     return true;
2999 
3000   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3001   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3002   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3003 
3004   assert(VDataIdx != -1);
3005   assert(DMaskIdx != -1);
3006   assert(TFEIdx != -1);
3007 
3008   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3009   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3010   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3011   if (DMask == 0)
3012     DMask = 1;
3013 
3014   unsigned DataSize =
3015     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3016   if (hasPackedD16()) {
3017     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3018     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3019       DataSize = (DataSize + 1) / 2;
3020   }
3021 
3022   return (VDataSize / 4) == DataSize + TFESize;
3023 }
3024 
3025 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3026   const unsigned Opc = Inst.getOpcode();
3027   const MCInstrDesc &Desc = MII.get(Opc);
3028 
3029   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
3030     return true;
3031 
3032   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3033   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3034       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3035   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3036   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3037   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3038 
3039   assert(VAddr0Idx != -1);
3040   assert(SrsrcIdx != -1);
3041   assert(DimIdx != -1);
3042   assert(SrsrcIdx > VAddr0Idx);
3043 
3044   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3045   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3046   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3047   unsigned VAddrSize =
3048       IsNSA ? SrsrcIdx - VAddr0Idx
3049             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3050 
3051   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3052                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3053                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3054                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3055   if (!IsNSA) {
3056     if (AddrSize > 8)
3057       AddrSize = 16;
3058     else if (AddrSize > 4)
3059       AddrSize = 8;
3060   }
3061 
3062   return VAddrSize == AddrSize;
3063 }
3064 
3065 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3066 
3067   const unsigned Opc = Inst.getOpcode();
3068   const MCInstrDesc &Desc = MII.get(Opc);
3069 
3070   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3071     return true;
3072   if (!Desc.mayLoad() || !Desc.mayStore())
3073     return true; // Not atomic
3074 
3075   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3076   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3077 
3078   // This is an incomplete check because image_atomic_cmpswap
3079   // may only use 0x3 and 0xf while other atomic operations
3080   // may use 0x1 and 0x3. However these limitations are
3081   // verified when we check that dmask matches dst size.
3082   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3083 }
3084 
3085 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3086 
3087   const unsigned Opc = Inst.getOpcode();
3088   const MCInstrDesc &Desc = MII.get(Opc);
3089 
3090   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3091     return true;
3092 
3093   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3094   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3095 
3096   // GATHER4 instructions use dmask in a different fashion compared to
3097   // other MIMG instructions. The only useful DMASK values are
3098   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3099   // (red,red,red,red) etc.) The ISA document doesn't mention
3100   // this.
3101   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3102 }
3103 
3104 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3105 {
3106   switch (Opcode) {
3107   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3108   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3109   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3110     return true;
3111   default:
3112     return false;
3113   }
3114 }
3115 
3116 // movrels* opcodes should only allow VGPRS as src0.
3117 // This is specified in .td description for vop1/vop3,
3118 // but sdwa is handled differently. See isSDWAOperand.
3119 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3120 
3121   const unsigned Opc = Inst.getOpcode();
3122   const MCInstrDesc &Desc = MII.get(Opc);
3123 
3124   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3125     return true;
3126 
3127   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3128   assert(Src0Idx != -1);
3129 
3130   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3131   if (!Src0.isReg())
3132     return false;
3133 
3134   auto Reg = Src0.getReg();
3135   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3136   return !isSGPR(mc2PseudoReg(Reg), TRI);
3137 }
3138 
3139 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3140 
3141   const unsigned Opc = Inst.getOpcode();
3142   const MCInstrDesc &Desc = MII.get(Opc);
3143 
3144   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3145     return true;
3146 
3147   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3148   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3149     if (isCI() || isSI())
3150       return false;
3151   }
3152 
3153   return true;
3154 }
3155 
3156 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3157   const unsigned Opc = Inst.getOpcode();
3158   const MCInstrDesc &Desc = MII.get(Opc);
3159 
3160   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3161     return true;
3162 
3163   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3164   if (DimIdx < 0)
3165     return true;
3166 
3167   long Imm = Inst.getOperand(DimIdx).getImm();
3168   if (Imm < 0 || Imm >= 8)
3169     return false;
3170 
3171   return true;
3172 }
3173 
3174 static bool IsRevOpcode(const unsigned Opcode)
3175 {
3176   switch (Opcode) {
3177   case AMDGPU::V_SUBREV_F32_e32:
3178   case AMDGPU::V_SUBREV_F32_e64:
3179   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3180   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3181   case AMDGPU::V_SUBREV_F32_e32_vi:
3182   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3183   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3184   case AMDGPU::V_SUBREV_F32_e64_vi:
3185 
3186   case AMDGPU::V_SUBREV_I32_e32:
3187   case AMDGPU::V_SUBREV_I32_e64:
3188   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3189   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3190 
3191   case AMDGPU::V_SUBBREV_U32_e32:
3192   case AMDGPU::V_SUBBREV_U32_e64:
3193   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3194   case AMDGPU::V_SUBBREV_U32_e32_vi:
3195   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3196   case AMDGPU::V_SUBBREV_U32_e64_vi:
3197 
3198   case AMDGPU::V_SUBREV_U32_e32:
3199   case AMDGPU::V_SUBREV_U32_e64:
3200   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3201   case AMDGPU::V_SUBREV_U32_e32_vi:
3202   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3203   case AMDGPU::V_SUBREV_U32_e64_vi:
3204 
3205   case AMDGPU::V_SUBREV_F16_e32:
3206   case AMDGPU::V_SUBREV_F16_e64:
3207   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3208   case AMDGPU::V_SUBREV_F16_e32_vi:
3209   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3210   case AMDGPU::V_SUBREV_F16_e64_vi:
3211 
3212   case AMDGPU::V_SUBREV_U16_e32:
3213   case AMDGPU::V_SUBREV_U16_e64:
3214   case AMDGPU::V_SUBREV_U16_e32_vi:
3215   case AMDGPU::V_SUBREV_U16_e64_vi:
3216 
3217   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3218   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3219   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3220 
3221   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3222   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3223 
3224   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3225   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3226 
3227   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3228   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3229 
3230   case AMDGPU::V_LSHRREV_B32_e32:
3231   case AMDGPU::V_LSHRREV_B32_e64:
3232   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3233   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3234   case AMDGPU::V_LSHRREV_B32_e32_vi:
3235   case AMDGPU::V_LSHRREV_B32_e64_vi:
3236   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3237   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3238 
3239   case AMDGPU::V_ASHRREV_I32_e32:
3240   case AMDGPU::V_ASHRREV_I32_e64:
3241   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3242   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3243   case AMDGPU::V_ASHRREV_I32_e32_vi:
3244   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3245   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3246   case AMDGPU::V_ASHRREV_I32_e64_vi:
3247 
3248   case AMDGPU::V_LSHLREV_B32_e32:
3249   case AMDGPU::V_LSHLREV_B32_e64:
3250   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3251   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3252   case AMDGPU::V_LSHLREV_B32_e32_vi:
3253   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3254   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3255   case AMDGPU::V_LSHLREV_B32_e64_vi:
3256 
3257   case AMDGPU::V_LSHLREV_B16_e32:
3258   case AMDGPU::V_LSHLREV_B16_e64:
3259   case AMDGPU::V_LSHLREV_B16_e32_vi:
3260   case AMDGPU::V_LSHLREV_B16_e64_vi:
3261   case AMDGPU::V_LSHLREV_B16_gfx10:
3262 
3263   case AMDGPU::V_LSHRREV_B16_e32:
3264   case AMDGPU::V_LSHRREV_B16_e64:
3265   case AMDGPU::V_LSHRREV_B16_e32_vi:
3266   case AMDGPU::V_LSHRREV_B16_e64_vi:
3267   case AMDGPU::V_LSHRREV_B16_gfx10:
3268 
3269   case AMDGPU::V_ASHRREV_I16_e32:
3270   case AMDGPU::V_ASHRREV_I16_e64:
3271   case AMDGPU::V_ASHRREV_I16_e32_vi:
3272   case AMDGPU::V_ASHRREV_I16_e64_vi:
3273   case AMDGPU::V_ASHRREV_I16_gfx10:
3274 
3275   case AMDGPU::V_LSHLREV_B64:
3276   case AMDGPU::V_LSHLREV_B64_gfx10:
3277   case AMDGPU::V_LSHLREV_B64_vi:
3278 
3279   case AMDGPU::V_LSHRREV_B64:
3280   case AMDGPU::V_LSHRREV_B64_gfx10:
3281   case AMDGPU::V_LSHRREV_B64_vi:
3282 
3283   case AMDGPU::V_ASHRREV_I64:
3284   case AMDGPU::V_ASHRREV_I64_gfx10:
3285   case AMDGPU::V_ASHRREV_I64_vi:
3286 
3287   case AMDGPU::V_PK_LSHLREV_B16:
3288   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3289   case AMDGPU::V_PK_LSHLREV_B16_vi:
3290 
3291   case AMDGPU::V_PK_LSHRREV_B16:
3292   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3293   case AMDGPU::V_PK_LSHRREV_B16_vi:
3294   case AMDGPU::V_PK_ASHRREV_I16:
3295   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3296   case AMDGPU::V_PK_ASHRREV_I16_vi:
3297     return true;
3298   default:
3299     return false;
3300   }
3301 }
3302 
3303 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3304 
3305   using namespace SIInstrFlags;
3306   const unsigned Opcode = Inst.getOpcode();
3307   const MCInstrDesc &Desc = MII.get(Opcode);
3308 
3309   // lds_direct register is defined so that it can be used
3310   // with 9-bit operands only. Ignore encodings which do not accept these.
3311   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3312     return true;
3313 
3314   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3315   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3316   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3317 
3318   const int SrcIndices[] = { Src1Idx, Src2Idx };
3319 
3320   // lds_direct cannot be specified as either src1 or src2.
3321   for (int SrcIdx : SrcIndices) {
3322     if (SrcIdx == -1) break;
3323     const MCOperand &Src = Inst.getOperand(SrcIdx);
3324     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3325       return false;
3326     }
3327   }
3328 
3329   if (Src0Idx == -1)
3330     return true;
3331 
3332   const MCOperand &Src = Inst.getOperand(Src0Idx);
3333   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3334     return true;
3335 
3336   // lds_direct is specified as src0. Check additional limitations.
3337   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3338 }
3339 
3340 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3341   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3342     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3343     if (Op.isFlatOffset())
3344       return Op.getStartLoc();
3345   }
3346   return getLoc();
3347 }
3348 
3349 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3350                                          const OperandVector &Operands) {
3351   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3352   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3353     return true;
3354 
3355   auto Opcode = Inst.getOpcode();
3356   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3357   assert(OpNum != -1);
3358 
3359   const auto &Op = Inst.getOperand(OpNum);
3360   if (!hasFlatOffsets() && Op.getImm() != 0) {
3361     Error(getFlatOffsetLoc(Operands),
3362           "flat offset modifier is not supported on this GPU");
3363     return false;
3364   }
3365 
3366   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3367   // For FLAT segment the offset must be positive;
3368   // MSB is ignored and forced to zero.
3369   unsigned OffsetSize = isGFX9() ? 13 : 12;
3370   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3371     if (!isIntN(OffsetSize, Op.getImm())) {
3372       Error(getFlatOffsetLoc(Operands),
3373             isGFX9() ? "expected a 13-bit signed offset" :
3374                        "expected a 12-bit signed offset");
3375       return false;
3376     }
3377   } else {
3378     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3379       Error(getFlatOffsetLoc(Operands),
3380             isGFX9() ? "expected a 12-bit unsigned offset" :
3381                        "expected an 11-bit unsigned offset");
3382       return false;
3383     }
3384   }
3385 
3386   return true;
3387 }
3388 
3389 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3390   unsigned Opcode = Inst.getOpcode();
3391   const MCInstrDesc &Desc = MII.get(Opcode);
3392   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3393     return true;
3394 
3395   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3396   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3397 
3398   const int OpIndices[] = { Src0Idx, Src1Idx };
3399 
3400   unsigned NumExprs = 0;
3401   unsigned NumLiterals = 0;
3402   uint32_t LiteralValue;
3403 
3404   for (int OpIdx : OpIndices) {
3405     if (OpIdx == -1) break;
3406 
3407     const MCOperand &MO = Inst.getOperand(OpIdx);
3408     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3409     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3410       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3411         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3412         if (NumLiterals == 0 || LiteralValue != Value) {
3413           LiteralValue = Value;
3414           ++NumLiterals;
3415         }
3416       } else if (MO.isExpr()) {
3417         ++NumExprs;
3418       }
3419     }
3420   }
3421 
3422   return NumLiterals + NumExprs <= 1;
3423 }
3424 
3425 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3426   const unsigned Opc = Inst.getOpcode();
3427   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3428       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3429     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3430     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3431 
3432     if (OpSel & ~3)
3433       return false;
3434   }
3435   return true;
3436 }
3437 
3438 // Check if VCC register matches wavefront size
3439 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3440   auto FB = getFeatureBits();
3441   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3442     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3443 }
3444 
3445 // VOP3 literal is only allowed in GFX10+ and only one can be used
3446 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3447   unsigned Opcode = Inst.getOpcode();
3448   const MCInstrDesc &Desc = MII.get(Opcode);
3449   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3450     return true;
3451 
3452   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3453   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3454   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3455 
3456   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3457 
3458   unsigned NumExprs = 0;
3459   unsigned NumLiterals = 0;
3460   uint32_t LiteralValue;
3461 
3462   for (int OpIdx : OpIndices) {
3463     if (OpIdx == -1) break;
3464 
3465     const MCOperand &MO = Inst.getOperand(OpIdx);
3466     if (!MO.isImm() && !MO.isExpr())
3467       continue;
3468     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3469       continue;
3470 
3471     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3472         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3473       return false;
3474 
3475     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3476       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3477       if (NumLiterals == 0 || LiteralValue != Value) {
3478         LiteralValue = Value;
3479         ++NumLiterals;
3480       }
3481     } else if (MO.isExpr()) {
3482       ++NumExprs;
3483     }
3484   }
3485   NumLiterals += NumExprs;
3486 
3487   return !NumLiterals ||
3488          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3489 }
3490 
3491 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3492                                           const SMLoc &IDLoc,
3493                                           const OperandVector &Operands) {
3494   if (!validateLdsDirect(Inst)) {
3495     Error(IDLoc,
3496       "invalid use of lds_direct");
3497     return false;
3498   }
3499   if (!validateSOPLiteral(Inst)) {
3500     Error(IDLoc,
3501       "only one literal operand is allowed");
3502     return false;
3503   }
3504   if (!validateVOP3Literal(Inst)) {
3505     Error(IDLoc,
3506       "invalid literal operand");
3507     return false;
3508   }
3509   if (!validateConstantBusLimitations(Inst)) {
3510     Error(IDLoc,
3511       "invalid operand (violates constant bus restrictions)");
3512     return false;
3513   }
3514   if (!validateEarlyClobberLimitations(Inst)) {
3515     Error(IDLoc,
3516       "destination must be different than all sources");
3517     return false;
3518   }
3519   if (!validateIntClampSupported(Inst)) {
3520     Error(IDLoc,
3521       "integer clamping is not supported on this GPU");
3522     return false;
3523   }
3524   if (!validateOpSel(Inst)) {
3525     Error(IDLoc,
3526       "invalid op_sel operand");
3527     return false;
3528   }
3529   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3530   if (!validateMIMGD16(Inst)) {
3531     Error(IDLoc,
3532       "d16 modifier is not supported on this GPU");
3533     return false;
3534   }
3535   if (!validateMIMGDim(Inst)) {
3536     Error(IDLoc, "dim modifier is required on this GPU");
3537     return false;
3538   }
3539   if (!validateMIMGDataSize(Inst)) {
3540     Error(IDLoc,
3541       "image data size does not match dmask and tfe");
3542     return false;
3543   }
3544   if (!validateMIMGAddrSize(Inst)) {
3545     Error(IDLoc,
3546       "image address size does not match dim and a16");
3547     return false;
3548   }
3549   if (!validateMIMGAtomicDMask(Inst)) {
3550     Error(IDLoc,
3551       "invalid atomic image dmask");
3552     return false;
3553   }
3554   if (!validateMIMGGatherDMask(Inst)) {
3555     Error(IDLoc,
3556       "invalid image_gather dmask: only one bit must be set");
3557     return false;
3558   }
3559   if (!validateMovrels(Inst)) {
3560     Error(IDLoc, "source operand must be a VGPR");
3561     return false;
3562   }
3563   if (!validateFlatOffset(Inst, Operands)) {
3564     return false;
3565   }
3566 
3567   return true;
3568 }
3569 
3570 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3571                                             const FeatureBitset &FBS,
3572                                             unsigned VariantID = 0);
3573 
3574 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3575                                               OperandVector &Operands,
3576                                               MCStreamer &Out,
3577                                               uint64_t &ErrorInfo,
3578                                               bool MatchingInlineAsm) {
3579   MCInst Inst;
3580   unsigned Result = Match_Success;
3581   for (auto Variant : getMatchedVariants()) {
3582     uint64_t EI;
3583     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3584                                   Variant);
3585     // We order match statuses from least to most specific. We use most specific
3586     // status as resulting
3587     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3588     if ((R == Match_Success) ||
3589         (R == Match_PreferE32) ||
3590         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3591         (R == Match_InvalidOperand && Result != Match_MissingFeature
3592                                    && Result != Match_PreferE32) ||
3593         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3594                                    && Result != Match_MissingFeature
3595                                    && Result != Match_PreferE32)) {
3596       Result = R;
3597       ErrorInfo = EI;
3598     }
3599     if (R == Match_Success)
3600       break;
3601   }
3602 
3603   switch (Result) {
3604   default: break;
3605   case Match_Success:
3606     if (!validateInstruction(Inst, IDLoc, Operands)) {
3607       return true;
3608     }
3609     Inst.setLoc(IDLoc);
3610     Out.emitInstruction(Inst, getSTI());
3611     return false;
3612 
3613   case Match_MissingFeature:
3614     return Error(IDLoc, "instruction not supported on this GPU");
3615 
3616   case Match_MnemonicFail: {
3617     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3618     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3619         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3620     return Error(IDLoc, "invalid instruction" + Suggestion,
3621                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3622   }
3623 
3624   case Match_InvalidOperand: {
3625     SMLoc ErrorLoc = IDLoc;
3626     if (ErrorInfo != ~0ULL) {
3627       if (ErrorInfo >= Operands.size()) {
3628         return Error(IDLoc, "too few operands for instruction");
3629       }
3630       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3631       if (ErrorLoc == SMLoc())
3632         ErrorLoc = IDLoc;
3633     }
3634     return Error(ErrorLoc, "invalid operand for instruction");
3635   }
3636 
3637   case Match_PreferE32:
3638     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3639                         "should be encoded as e32");
3640   }
3641   llvm_unreachable("Implement any new match types added!");
3642 }
3643 
3644 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3645   int64_t Tmp = -1;
3646   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3647     return true;
3648   }
3649   if (getParser().parseAbsoluteExpression(Tmp)) {
3650     return true;
3651   }
3652   Ret = static_cast<uint32_t>(Tmp);
3653   return false;
3654 }
3655 
3656 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3657                                                uint32_t &Minor) {
3658   if (ParseAsAbsoluteExpression(Major))
3659     return TokError("invalid major version");
3660 
3661   if (getLexer().isNot(AsmToken::Comma))
3662     return TokError("minor version number required, comma expected");
3663   Lex();
3664 
3665   if (ParseAsAbsoluteExpression(Minor))
3666     return TokError("invalid minor version");
3667 
3668   return false;
3669 }
3670 
3671 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3672   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3673     return TokError("directive only supported for amdgcn architecture");
3674 
3675   std::string Target;
3676 
3677   SMLoc TargetStart = getTok().getLoc();
3678   if (getParser().parseEscapedString(Target))
3679     return true;
3680   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3681 
3682   std::string ExpectedTarget;
3683   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3684   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3685 
3686   if (Target != ExpectedTargetOS.str())
3687     return getParser().Error(TargetRange.Start, "target must match options",
3688                              TargetRange);
3689 
3690   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3691   return false;
3692 }
3693 
3694 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3695   return getParser().Error(Range.Start, "value out of range", Range);
3696 }
3697 
3698 bool AMDGPUAsmParser::calculateGPRBlocks(
3699     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3700     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3701     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3702     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3703   // TODO(scott.linder): These calculations are duplicated from
3704   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3705   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3706 
3707   unsigned NumVGPRs = NextFreeVGPR;
3708   unsigned NumSGPRs = NextFreeSGPR;
3709 
3710   if (Version.Major >= 10)
3711     NumSGPRs = 0;
3712   else {
3713     unsigned MaxAddressableNumSGPRs =
3714         IsaInfo::getAddressableNumSGPRs(&getSTI());
3715 
3716     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3717         NumSGPRs > MaxAddressableNumSGPRs)
3718       return OutOfRangeError(SGPRRange);
3719 
3720     NumSGPRs +=
3721         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3722 
3723     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3724         NumSGPRs > MaxAddressableNumSGPRs)
3725       return OutOfRangeError(SGPRRange);
3726 
3727     if (Features.test(FeatureSGPRInitBug))
3728       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3729   }
3730 
3731   VGPRBlocks =
3732       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3733   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3734 
3735   return false;
3736 }
3737 
3738 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3739   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3740     return TokError("directive only supported for amdgcn architecture");
3741 
3742   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3743     return TokError("directive only supported for amdhsa OS");
3744 
3745   StringRef KernelName;
3746   if (getParser().parseIdentifier(KernelName))
3747     return true;
3748 
3749   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3750 
3751   StringSet<> Seen;
3752 
3753   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3754 
3755   SMRange VGPRRange;
3756   uint64_t NextFreeVGPR = 0;
3757   SMRange SGPRRange;
3758   uint64_t NextFreeSGPR = 0;
3759   unsigned UserSGPRCount = 0;
3760   bool ReserveVCC = true;
3761   bool ReserveFlatScr = true;
3762   bool ReserveXNACK = hasXNACK();
3763   Optional<bool> EnableWavefrontSize32;
3764 
3765   while (true) {
3766     while (getLexer().is(AsmToken::EndOfStatement))
3767       Lex();
3768 
3769     if (getLexer().isNot(AsmToken::Identifier))
3770       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3771 
3772     StringRef ID = getTok().getIdentifier();
3773     SMRange IDRange = getTok().getLocRange();
3774     Lex();
3775 
3776     if (ID == ".end_amdhsa_kernel")
3777       break;
3778 
3779     if (Seen.find(ID) != Seen.end())
3780       return TokError(".amdhsa_ directives cannot be repeated");
3781     Seen.insert(ID);
3782 
3783     SMLoc ValStart = getTok().getLoc();
3784     int64_t IVal;
3785     if (getParser().parseAbsoluteExpression(IVal))
3786       return true;
3787     SMLoc ValEnd = getTok().getLoc();
3788     SMRange ValRange = SMRange(ValStart, ValEnd);
3789 
3790     if (IVal < 0)
3791       return OutOfRangeError(ValRange);
3792 
3793     uint64_t Val = IVal;
3794 
3795 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3796   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3797     return OutOfRangeError(RANGE);                                             \
3798   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3799 
3800     if (ID == ".amdhsa_group_segment_fixed_size") {
3801       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3802         return OutOfRangeError(ValRange);
3803       KD.group_segment_fixed_size = Val;
3804     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3805       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3806         return OutOfRangeError(ValRange);
3807       KD.private_segment_fixed_size = Val;
3808     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3809       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3810                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3811                        Val, ValRange);
3812       if (Val)
3813         UserSGPRCount += 4;
3814     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3815       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3816                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3817                        ValRange);
3818       if (Val)
3819         UserSGPRCount += 2;
3820     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3821       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3822                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3823                        ValRange);
3824       if (Val)
3825         UserSGPRCount += 2;
3826     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3827       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3828                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3829                        Val, ValRange);
3830       if (Val)
3831         UserSGPRCount += 2;
3832     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3833       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3834                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3835                        ValRange);
3836       if (Val)
3837         UserSGPRCount += 2;
3838     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3839       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3840                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3841                        ValRange);
3842       if (Val)
3843         UserSGPRCount += 2;
3844     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3845       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3846                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3847                        Val, ValRange);
3848       if (Val)
3849         UserSGPRCount += 1;
3850     } else if (ID == ".amdhsa_wavefront_size32") {
3851       if (IVersion.Major < 10)
3852         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3853                                  IDRange);
3854       EnableWavefrontSize32 = Val;
3855       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3856                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3857                        Val, ValRange);
3858     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3859       PARSE_BITS_ENTRY(
3860           KD.compute_pgm_rsrc2,
3861           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3862           ValRange);
3863     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3865                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3866                        ValRange);
3867     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3868       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3869                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3870                        ValRange);
3871     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3872       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3873                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3874                        ValRange);
3875     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3876       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3877                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3878                        ValRange);
3879     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3880       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3881                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3882                        ValRange);
3883     } else if (ID == ".amdhsa_next_free_vgpr") {
3884       VGPRRange = ValRange;
3885       NextFreeVGPR = Val;
3886     } else if (ID == ".amdhsa_next_free_sgpr") {
3887       SGPRRange = ValRange;
3888       NextFreeSGPR = Val;
3889     } else if (ID == ".amdhsa_reserve_vcc") {
3890       if (!isUInt<1>(Val))
3891         return OutOfRangeError(ValRange);
3892       ReserveVCC = Val;
3893     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3894       if (IVersion.Major < 7)
3895         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3896                                  IDRange);
3897       if (!isUInt<1>(Val))
3898         return OutOfRangeError(ValRange);
3899       ReserveFlatScr = Val;
3900     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3901       if (IVersion.Major < 8)
3902         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3903                                  IDRange);
3904       if (!isUInt<1>(Val))
3905         return OutOfRangeError(ValRange);
3906       ReserveXNACK = Val;
3907     } else if (ID == ".amdhsa_float_round_mode_32") {
3908       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3909                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3910     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3911       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3912                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3913     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3914       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3915                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3916     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3917       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3918                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3919                        ValRange);
3920     } else if (ID == ".amdhsa_dx10_clamp") {
3921       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3922                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3923     } else if (ID == ".amdhsa_ieee_mode") {
3924       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3925                        Val, ValRange);
3926     } else if (ID == ".amdhsa_fp16_overflow") {
3927       if (IVersion.Major < 9)
3928         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3929                                  IDRange);
3930       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3931                        ValRange);
3932     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3933       if (IVersion.Major < 10)
3934         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3935                                  IDRange);
3936       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3937                        ValRange);
3938     } else if (ID == ".amdhsa_memory_ordered") {
3939       if (IVersion.Major < 10)
3940         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3941                                  IDRange);
3942       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3943                        ValRange);
3944     } else if (ID == ".amdhsa_forward_progress") {
3945       if (IVersion.Major < 10)
3946         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3947                                  IDRange);
3948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3949                        ValRange);
3950     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3951       PARSE_BITS_ENTRY(
3952           KD.compute_pgm_rsrc2,
3953           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3954           ValRange);
3955     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3957                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3958                        Val, ValRange);
3959     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3960       PARSE_BITS_ENTRY(
3961           KD.compute_pgm_rsrc2,
3962           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3963           ValRange);
3964     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3966                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3967                        Val, ValRange);
3968     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3969       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3970                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3971                        Val, ValRange);
3972     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3973       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3974                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3975                        Val, ValRange);
3976     } else if (ID == ".amdhsa_exception_int_div_zero") {
3977       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3978                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3979                        Val, ValRange);
3980     } else {
3981       return getParser().Error(IDRange.Start,
3982                                "unknown .amdhsa_kernel directive", IDRange);
3983     }
3984 
3985 #undef PARSE_BITS_ENTRY
3986   }
3987 
3988   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3989     return TokError(".amdhsa_next_free_vgpr directive is required");
3990 
3991   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3992     return TokError(".amdhsa_next_free_sgpr directive is required");
3993 
3994   unsigned VGPRBlocks;
3995   unsigned SGPRBlocks;
3996   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3997                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3998                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3999                          SGPRBlocks))
4000     return true;
4001 
4002   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4003           VGPRBlocks))
4004     return OutOfRangeError(VGPRRange);
4005   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4006                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4007 
4008   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4009           SGPRBlocks))
4010     return OutOfRangeError(SGPRRange);
4011   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4012                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4013                   SGPRBlocks);
4014 
4015   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4016     return TokError("too many user SGPRs enabled");
4017   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4018                   UserSGPRCount);
4019 
4020   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4021       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4022       ReserveFlatScr, ReserveXNACK);
4023   return false;
4024 }
4025 
4026 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4027   uint32_t Major;
4028   uint32_t Minor;
4029 
4030   if (ParseDirectiveMajorMinor(Major, Minor))
4031     return true;
4032 
4033   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4034   return false;
4035 }
4036 
4037 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4038   uint32_t Major;
4039   uint32_t Minor;
4040   uint32_t Stepping;
4041   StringRef VendorName;
4042   StringRef ArchName;
4043 
4044   // If this directive has no arguments, then use the ISA version for the
4045   // targeted GPU.
4046   if (getLexer().is(AsmToken::EndOfStatement)) {
4047     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4048     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4049                                                       ISA.Stepping,
4050                                                       "AMD", "AMDGPU");
4051     return false;
4052   }
4053 
4054   if (ParseDirectiveMajorMinor(Major, Minor))
4055     return true;
4056 
4057   if (getLexer().isNot(AsmToken::Comma))
4058     return TokError("stepping version number required, comma expected");
4059   Lex();
4060 
4061   if (ParseAsAbsoluteExpression(Stepping))
4062     return TokError("invalid stepping version");
4063 
4064   if (getLexer().isNot(AsmToken::Comma))
4065     return TokError("vendor name required, comma expected");
4066   Lex();
4067 
4068   if (getLexer().isNot(AsmToken::String))
4069     return TokError("invalid vendor name");
4070 
4071   VendorName = getLexer().getTok().getStringContents();
4072   Lex();
4073 
4074   if (getLexer().isNot(AsmToken::Comma))
4075     return TokError("arch name required, comma expected");
4076   Lex();
4077 
4078   if (getLexer().isNot(AsmToken::String))
4079     return TokError("invalid arch name");
4080 
4081   ArchName = getLexer().getTok().getStringContents();
4082   Lex();
4083 
4084   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4085                                                     VendorName, ArchName);
4086   return false;
4087 }
4088 
4089 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4090                                                amd_kernel_code_t &Header) {
4091   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4092   // assembly for backwards compatibility.
4093   if (ID == "max_scratch_backing_memory_byte_size") {
4094     Parser.eatToEndOfStatement();
4095     return false;
4096   }
4097 
4098   SmallString<40> ErrStr;
4099   raw_svector_ostream Err(ErrStr);
4100   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4101     return TokError(Err.str());
4102   }
4103   Lex();
4104 
4105   if (ID == "enable_wavefront_size32") {
4106     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4107       if (!isGFX10())
4108         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4109       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4110         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4111     } else {
4112       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4113         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4114     }
4115   }
4116 
4117   if (ID == "wavefront_size") {
4118     if (Header.wavefront_size == 5) {
4119       if (!isGFX10())
4120         return TokError("wavefront_size=5 is only allowed on GFX10+");
4121       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4122         return TokError("wavefront_size=5 requires +WavefrontSize32");
4123     } else if (Header.wavefront_size == 6) {
4124       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4125         return TokError("wavefront_size=6 requires +WavefrontSize64");
4126     }
4127   }
4128 
4129   if (ID == "enable_wgp_mode") {
4130     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4131       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4132   }
4133 
4134   if (ID == "enable_mem_ordered") {
4135     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4136       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4137   }
4138 
4139   if (ID == "enable_fwd_progress") {
4140     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4141       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4142   }
4143 
4144   return false;
4145 }
4146 
4147 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4148   amd_kernel_code_t Header;
4149   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4150 
4151   while (true) {
4152     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4153     // will set the current token to EndOfStatement.
4154     while(getLexer().is(AsmToken::EndOfStatement))
4155       Lex();
4156 
4157     if (getLexer().isNot(AsmToken::Identifier))
4158       return TokError("expected value identifier or .end_amd_kernel_code_t");
4159 
4160     StringRef ID = getLexer().getTok().getIdentifier();
4161     Lex();
4162 
4163     if (ID == ".end_amd_kernel_code_t")
4164       break;
4165 
4166     if (ParseAMDKernelCodeTValue(ID, Header))
4167       return true;
4168   }
4169 
4170   getTargetStreamer().EmitAMDKernelCodeT(Header);
4171 
4172   return false;
4173 }
4174 
4175 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4176   if (getLexer().isNot(AsmToken::Identifier))
4177     return TokError("expected symbol name");
4178 
4179   StringRef KernelName = Parser.getTok().getString();
4180 
4181   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4182                                            ELF::STT_AMDGPU_HSA_KERNEL);
4183   Lex();
4184   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4185     KernelScope.initialize(getContext());
4186   return false;
4187 }
4188 
4189 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4190   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4191     return Error(getParser().getTok().getLoc(),
4192                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4193                  "architectures");
4194   }
4195 
4196   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4197 
4198   std::string ISAVersionStringFromSTI;
4199   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4200   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4201 
4202   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4203     return Error(getParser().getTok().getLoc(),
4204                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4205                  "arguments specified through the command line");
4206   }
4207 
4208   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4209   Lex();
4210 
4211   return false;
4212 }
4213 
4214 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4215   const char *AssemblerDirectiveBegin;
4216   const char *AssemblerDirectiveEnd;
4217   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4218       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4219           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4220                             HSAMD::V3::AssemblerDirectiveEnd)
4221           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4222                             HSAMD::AssemblerDirectiveEnd);
4223 
4224   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4225     return Error(getParser().getTok().getLoc(),
4226                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4227                  "not available on non-amdhsa OSes")).str());
4228   }
4229 
4230   std::string HSAMetadataString;
4231   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4232                           HSAMetadataString))
4233     return true;
4234 
4235   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4236     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4237       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4238   } else {
4239     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4240       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4241   }
4242 
4243   return false;
4244 }
4245 
4246 /// Common code to parse out a block of text (typically YAML) between start and
4247 /// end directives.
4248 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4249                                           const char *AssemblerDirectiveEnd,
4250                                           std::string &CollectString) {
4251 
4252   raw_string_ostream CollectStream(CollectString);
4253 
4254   getLexer().setSkipSpace(false);
4255 
4256   bool FoundEnd = false;
4257   while (!getLexer().is(AsmToken::Eof)) {
4258     while (getLexer().is(AsmToken::Space)) {
4259       CollectStream << getLexer().getTok().getString();
4260       Lex();
4261     }
4262 
4263     if (getLexer().is(AsmToken::Identifier)) {
4264       StringRef ID = getLexer().getTok().getIdentifier();
4265       if (ID == AssemblerDirectiveEnd) {
4266         Lex();
4267         FoundEnd = true;
4268         break;
4269       }
4270     }
4271 
4272     CollectStream << Parser.parseStringToEndOfStatement()
4273                   << getContext().getAsmInfo()->getSeparatorString();
4274 
4275     Parser.eatToEndOfStatement();
4276   }
4277 
4278   getLexer().setSkipSpace(true);
4279 
4280   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4281     return TokError(Twine("expected directive ") +
4282                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4283   }
4284 
4285   CollectStream.flush();
4286   return false;
4287 }
4288 
4289 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4290 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4291   std::string String;
4292   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4293                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4294     return true;
4295 
4296   auto PALMetadata = getTargetStreamer().getPALMetadata();
4297   if (!PALMetadata->setFromString(String))
4298     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4299   return false;
4300 }
4301 
4302 /// Parse the assembler directive for old linear-format PAL metadata.
4303 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4304   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4305     return Error(getParser().getTok().getLoc(),
4306                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4307                  "not available on non-amdpal OSes")).str());
4308   }
4309 
4310   auto PALMetadata = getTargetStreamer().getPALMetadata();
4311   PALMetadata->setLegacy();
4312   for (;;) {
4313     uint32_t Key, Value;
4314     if (ParseAsAbsoluteExpression(Key)) {
4315       return TokError(Twine("invalid value in ") +
4316                       Twine(PALMD::AssemblerDirective));
4317     }
4318     if (getLexer().isNot(AsmToken::Comma)) {
4319       return TokError(Twine("expected an even number of values in ") +
4320                       Twine(PALMD::AssemblerDirective));
4321     }
4322     Lex();
4323     if (ParseAsAbsoluteExpression(Value)) {
4324       return TokError(Twine("invalid value in ") +
4325                       Twine(PALMD::AssemblerDirective));
4326     }
4327     PALMetadata->setRegister(Key, Value);
4328     if (getLexer().isNot(AsmToken::Comma))
4329       break;
4330     Lex();
4331   }
4332   return false;
4333 }
4334 
4335 /// ParseDirectiveAMDGPULDS
4336 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4337 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4338   if (getParser().checkForValidSection())
4339     return true;
4340 
4341   StringRef Name;
4342   SMLoc NameLoc = getLexer().getLoc();
4343   if (getParser().parseIdentifier(Name))
4344     return TokError("expected identifier in directive");
4345 
4346   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4347   if (parseToken(AsmToken::Comma, "expected ','"))
4348     return true;
4349 
4350   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4351 
4352   int64_t Size;
4353   SMLoc SizeLoc = getLexer().getLoc();
4354   if (getParser().parseAbsoluteExpression(Size))
4355     return true;
4356   if (Size < 0)
4357     return Error(SizeLoc, "size must be non-negative");
4358   if (Size > LocalMemorySize)
4359     return Error(SizeLoc, "size is too large");
4360 
4361   int64_t Align = 4;
4362   if (getLexer().is(AsmToken::Comma)) {
4363     Lex();
4364     SMLoc AlignLoc = getLexer().getLoc();
4365     if (getParser().parseAbsoluteExpression(Align))
4366       return true;
4367     if (Align < 0 || !isPowerOf2_64(Align))
4368       return Error(AlignLoc, "alignment must be a power of two");
4369 
4370     // Alignment larger than the size of LDS is possible in theory, as long
4371     // as the linker manages to place to symbol at address 0, but we do want
4372     // to make sure the alignment fits nicely into a 32-bit integer.
4373     if (Align >= 1u << 31)
4374       return Error(AlignLoc, "alignment is too large");
4375   }
4376 
4377   if (parseToken(AsmToken::EndOfStatement,
4378                  "unexpected token in '.amdgpu_lds' directive"))
4379     return true;
4380 
4381   Symbol->redefineIfPossible();
4382   if (!Symbol->isUndefined())
4383     return Error(NameLoc, "invalid symbol redefinition");
4384 
4385   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4386   return false;
4387 }
4388 
4389 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4390   StringRef IDVal = DirectiveID.getString();
4391 
4392   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4393     if (IDVal == ".amdgcn_target")
4394       return ParseDirectiveAMDGCNTarget();
4395 
4396     if (IDVal == ".amdhsa_kernel")
4397       return ParseDirectiveAMDHSAKernel();
4398 
4399     // TODO: Restructure/combine with PAL metadata directive.
4400     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4401       return ParseDirectiveHSAMetadata();
4402   } else {
4403     if (IDVal == ".hsa_code_object_version")
4404       return ParseDirectiveHSACodeObjectVersion();
4405 
4406     if (IDVal == ".hsa_code_object_isa")
4407       return ParseDirectiveHSACodeObjectISA();
4408 
4409     if (IDVal == ".amd_kernel_code_t")
4410       return ParseDirectiveAMDKernelCodeT();
4411 
4412     if (IDVal == ".amdgpu_hsa_kernel")
4413       return ParseDirectiveAMDGPUHsaKernel();
4414 
4415     if (IDVal == ".amd_amdgpu_isa")
4416       return ParseDirectiveISAVersion();
4417 
4418     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4419       return ParseDirectiveHSAMetadata();
4420   }
4421 
4422   if (IDVal == ".amdgpu_lds")
4423     return ParseDirectiveAMDGPULDS();
4424 
4425   if (IDVal == PALMD::AssemblerDirectiveBegin)
4426     return ParseDirectivePALMetadataBegin();
4427 
4428   if (IDVal == PALMD::AssemblerDirective)
4429     return ParseDirectivePALMetadata();
4430 
4431   return true;
4432 }
4433 
4434 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4435                                            unsigned RegNo) const {
4436 
4437   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4438        R.isValid(); ++R) {
4439     if (*R == RegNo)
4440       return isGFX9() || isGFX10();
4441   }
4442 
4443   // GFX10 has 2 more SGPRs 104 and 105.
4444   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4445        R.isValid(); ++R) {
4446     if (*R == RegNo)
4447       return hasSGPR104_SGPR105();
4448   }
4449 
4450   switch (RegNo) {
4451   case AMDGPU::SRC_SHARED_BASE:
4452   case AMDGPU::SRC_SHARED_LIMIT:
4453   case AMDGPU::SRC_PRIVATE_BASE:
4454   case AMDGPU::SRC_PRIVATE_LIMIT:
4455   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4456     return !isCI() && !isSI() && !isVI();
4457   case AMDGPU::TBA:
4458   case AMDGPU::TBA_LO:
4459   case AMDGPU::TBA_HI:
4460   case AMDGPU::TMA:
4461   case AMDGPU::TMA_LO:
4462   case AMDGPU::TMA_HI:
4463     return !isGFX9() && !isGFX10();
4464   case AMDGPU::XNACK_MASK:
4465   case AMDGPU::XNACK_MASK_LO:
4466   case AMDGPU::XNACK_MASK_HI:
4467     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4468   case AMDGPU::SGPR_NULL:
4469     return isGFX10();
4470   default:
4471     break;
4472   }
4473 
4474   if (isCI())
4475     return true;
4476 
4477   if (isSI() || isGFX10()) {
4478     // No flat_scr on SI.
4479     // On GFX10 flat scratch is not a valid register operand and can only be
4480     // accessed with s_setreg/s_getreg.
4481     switch (RegNo) {
4482     case AMDGPU::FLAT_SCR:
4483     case AMDGPU::FLAT_SCR_LO:
4484     case AMDGPU::FLAT_SCR_HI:
4485       return false;
4486     default:
4487       return true;
4488     }
4489   }
4490 
4491   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4492   // SI/CI have.
4493   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4494        R.isValid(); ++R) {
4495     if (*R == RegNo)
4496       return hasSGPR102_SGPR103();
4497   }
4498 
4499   return true;
4500 }
4501 
4502 OperandMatchResultTy
4503 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4504                               OperandMode Mode) {
4505   // Try to parse with a custom parser
4506   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4507 
4508   // If we successfully parsed the operand or if there as an error parsing,
4509   // we are done.
4510   //
4511   // If we are parsing after we reach EndOfStatement then this means we
4512   // are appending default values to the Operands list.  This is only done
4513   // by custom parser, so we shouldn't continue on to the generic parsing.
4514   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4515       getLexer().is(AsmToken::EndOfStatement))
4516     return ResTy;
4517 
4518   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4519     unsigned Prefix = Operands.size();
4520     SMLoc LBraceLoc = getTok().getLoc();
4521     Parser.Lex(); // eat the '['
4522 
4523     for (;;) {
4524       ResTy = parseReg(Operands);
4525       if (ResTy != MatchOperand_Success)
4526         return ResTy;
4527 
4528       if (getLexer().is(AsmToken::RBrac))
4529         break;
4530 
4531       if (getLexer().isNot(AsmToken::Comma))
4532         return MatchOperand_ParseFail;
4533       Parser.Lex();
4534     }
4535 
4536     if (Operands.size() - Prefix > 1) {
4537       Operands.insert(Operands.begin() + Prefix,
4538                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4539       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4540                                                     getTok().getLoc()));
4541     }
4542 
4543     Parser.Lex(); // eat the ']'
4544     return MatchOperand_Success;
4545   }
4546 
4547   return parseRegOrImm(Operands);
4548 }
4549 
4550 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4551   // Clear any forced encodings from the previous instruction.
4552   setForcedEncodingSize(0);
4553   setForcedDPP(false);
4554   setForcedSDWA(false);
4555 
4556   if (Name.endswith("_e64")) {
4557     setForcedEncodingSize(64);
4558     return Name.substr(0, Name.size() - 4);
4559   } else if (Name.endswith("_e32")) {
4560     setForcedEncodingSize(32);
4561     return Name.substr(0, Name.size() - 4);
4562   } else if (Name.endswith("_dpp")) {
4563     setForcedDPP(true);
4564     return Name.substr(0, Name.size() - 4);
4565   } else if (Name.endswith("_sdwa")) {
4566     setForcedSDWA(true);
4567     return Name.substr(0, Name.size() - 5);
4568   }
4569   return Name;
4570 }
4571 
4572 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4573                                        StringRef Name,
4574                                        SMLoc NameLoc, OperandVector &Operands) {
4575   // Add the instruction mnemonic
4576   Name = parseMnemonicSuffix(Name);
4577   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4578 
4579   bool IsMIMG = Name.startswith("image_");
4580 
4581   while (!getLexer().is(AsmToken::EndOfStatement)) {
4582     OperandMode Mode = OperandMode_Default;
4583     if (IsMIMG && isGFX10() && Operands.size() == 2)
4584       Mode = OperandMode_NSA;
4585     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4586 
4587     // Eat the comma or space if there is one.
4588     if (getLexer().is(AsmToken::Comma))
4589       Parser.Lex();
4590 
4591     switch (Res) {
4592       case MatchOperand_Success: break;
4593       case MatchOperand_ParseFail:
4594         // FIXME: use real operand location rather than the current location.
4595         Error(getLexer().getLoc(), "failed parsing operand.");
4596         while (!getLexer().is(AsmToken::EndOfStatement)) {
4597           Parser.Lex();
4598         }
4599         return true;
4600       case MatchOperand_NoMatch:
4601         // FIXME: use real operand location rather than the current location.
4602         Error(getLexer().getLoc(), "not a valid operand.");
4603         while (!getLexer().is(AsmToken::EndOfStatement)) {
4604           Parser.Lex();
4605         }
4606         return true;
4607     }
4608   }
4609 
4610   return false;
4611 }
4612 
4613 //===----------------------------------------------------------------------===//
4614 // Utility functions
4615 //===----------------------------------------------------------------------===//
4616 
4617 OperandMatchResultTy
4618 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4619 
4620   if (!trySkipId(Prefix, AsmToken::Colon))
4621     return MatchOperand_NoMatch;
4622 
4623   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4624 }
4625 
4626 OperandMatchResultTy
4627 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4628                                     AMDGPUOperand::ImmTy ImmTy,
4629                                     bool (*ConvertResult)(int64_t&)) {
4630   SMLoc S = getLoc();
4631   int64_t Value = 0;
4632 
4633   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4634   if (Res != MatchOperand_Success)
4635     return Res;
4636 
4637   if (ConvertResult && !ConvertResult(Value)) {
4638     Error(S, "invalid " + StringRef(Prefix) + " value.");
4639   }
4640 
4641   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4642   return MatchOperand_Success;
4643 }
4644 
4645 OperandMatchResultTy
4646 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4647                                              OperandVector &Operands,
4648                                              AMDGPUOperand::ImmTy ImmTy,
4649                                              bool (*ConvertResult)(int64_t&)) {
4650   SMLoc S = getLoc();
4651   if (!trySkipId(Prefix, AsmToken::Colon))
4652     return MatchOperand_NoMatch;
4653 
4654   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4655     return MatchOperand_ParseFail;
4656 
4657   unsigned Val = 0;
4658   const unsigned MaxSize = 4;
4659 
4660   // FIXME: How to verify the number of elements matches the number of src
4661   // operands?
4662   for (int I = 0; ; ++I) {
4663     int64_t Op;
4664     SMLoc Loc = getLoc();
4665     if (!parseExpr(Op))
4666       return MatchOperand_ParseFail;
4667 
4668     if (Op != 0 && Op != 1) {
4669       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4670       return MatchOperand_ParseFail;
4671     }
4672 
4673     Val |= (Op << I);
4674 
4675     if (trySkipToken(AsmToken::RBrac))
4676       break;
4677 
4678     if (I + 1 == MaxSize) {
4679       Error(getLoc(), "expected a closing square bracket");
4680       return MatchOperand_ParseFail;
4681     }
4682 
4683     if (!skipToken(AsmToken::Comma, "expected a comma"))
4684       return MatchOperand_ParseFail;
4685   }
4686 
4687   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4688   return MatchOperand_Success;
4689 }
4690 
4691 OperandMatchResultTy
4692 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4693                                AMDGPUOperand::ImmTy ImmTy) {
4694   int64_t Bit = 0;
4695   SMLoc S = Parser.getTok().getLoc();
4696 
4697   // We are at the end of the statement, and this is a default argument, so
4698   // use a default value.
4699   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4700     switch(getLexer().getKind()) {
4701       case AsmToken::Identifier: {
4702         StringRef Tok = Parser.getTok().getString();
4703         if (Tok == Name) {
4704           if (Tok == "r128" && !hasMIMG_R128())
4705             Error(S, "r128 modifier is not supported on this GPU");
4706           if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
4707             Error(S, "a16 modifier is not supported on this GPU");
4708           Bit = 1;
4709           Parser.Lex();
4710         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4711           Bit = 0;
4712           Parser.Lex();
4713         } else {
4714           return MatchOperand_NoMatch;
4715         }
4716         break;
4717       }
4718       default:
4719         return MatchOperand_NoMatch;
4720     }
4721   }
4722 
4723   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4724     return MatchOperand_ParseFail;
4725 
4726   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
4727     ImmTy = AMDGPUOperand::ImmTyR128A16;
4728 
4729   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4730   return MatchOperand_Success;
4731 }
4732 
4733 static void addOptionalImmOperand(
4734   MCInst& Inst, const OperandVector& Operands,
4735   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4736   AMDGPUOperand::ImmTy ImmT,
4737   int64_t Default = 0) {
4738   auto i = OptionalIdx.find(ImmT);
4739   if (i != OptionalIdx.end()) {
4740     unsigned Idx = i->second;
4741     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4742   } else {
4743     Inst.addOperand(MCOperand::createImm(Default));
4744   }
4745 }
4746 
4747 OperandMatchResultTy
4748 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4749   if (getLexer().isNot(AsmToken::Identifier)) {
4750     return MatchOperand_NoMatch;
4751   }
4752   StringRef Tok = Parser.getTok().getString();
4753   if (Tok != Prefix) {
4754     return MatchOperand_NoMatch;
4755   }
4756 
4757   Parser.Lex();
4758   if (getLexer().isNot(AsmToken::Colon)) {
4759     return MatchOperand_ParseFail;
4760   }
4761 
4762   Parser.Lex();
4763   if (getLexer().isNot(AsmToken::Identifier)) {
4764     return MatchOperand_ParseFail;
4765   }
4766 
4767   Value = Parser.getTok().getString();
4768   return MatchOperand_Success;
4769 }
4770 
4771 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4772 // values to live in a joint format operand in the MCInst encoding.
4773 OperandMatchResultTy
4774 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4775   SMLoc S = Parser.getTok().getLoc();
4776   int64_t Dfmt = 0, Nfmt = 0;
4777   // dfmt and nfmt can appear in either order, and each is optional.
4778   bool GotDfmt = false, GotNfmt = false;
4779   while (!GotDfmt || !GotNfmt) {
4780     if (!GotDfmt) {
4781       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4782       if (Res != MatchOperand_NoMatch) {
4783         if (Res != MatchOperand_Success)
4784           return Res;
4785         if (Dfmt >= 16) {
4786           Error(Parser.getTok().getLoc(), "out of range dfmt");
4787           return MatchOperand_ParseFail;
4788         }
4789         GotDfmt = true;
4790         Parser.Lex();
4791         continue;
4792       }
4793     }
4794     if (!GotNfmt) {
4795       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4796       if (Res != MatchOperand_NoMatch) {
4797         if (Res != MatchOperand_Success)
4798           return Res;
4799         if (Nfmt >= 8) {
4800           Error(Parser.getTok().getLoc(), "out of range nfmt");
4801           return MatchOperand_ParseFail;
4802         }
4803         GotNfmt = true;
4804         Parser.Lex();
4805         continue;
4806       }
4807     }
4808     break;
4809   }
4810   if (!GotDfmt && !GotNfmt)
4811     return MatchOperand_NoMatch;
4812   auto Format = Dfmt | Nfmt << 4;
4813   Operands.push_back(
4814       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4815   return MatchOperand_Success;
4816 }
4817 
4818 //===----------------------------------------------------------------------===//
4819 // ds
4820 //===----------------------------------------------------------------------===//
4821 
4822 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4823                                     const OperandVector &Operands) {
4824   OptionalImmIndexMap OptionalIdx;
4825 
4826   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4827     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4828 
4829     // Add the register arguments
4830     if (Op.isReg()) {
4831       Op.addRegOperands(Inst, 1);
4832       continue;
4833     }
4834 
4835     // Handle optional arguments
4836     OptionalIdx[Op.getImmTy()] = i;
4837   }
4838 
4839   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4840   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4841   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4842 
4843   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4844 }
4845 
4846 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4847                                 bool IsGdsHardcoded) {
4848   OptionalImmIndexMap OptionalIdx;
4849 
4850   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4851     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4852 
4853     // Add the register arguments
4854     if (Op.isReg()) {
4855       Op.addRegOperands(Inst, 1);
4856       continue;
4857     }
4858 
4859     if (Op.isToken() && Op.getToken() == "gds") {
4860       IsGdsHardcoded = true;
4861       continue;
4862     }
4863 
4864     // Handle optional arguments
4865     OptionalIdx[Op.getImmTy()] = i;
4866   }
4867 
4868   AMDGPUOperand::ImmTy OffsetType =
4869     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4870      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4871      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4872                                                       AMDGPUOperand::ImmTyOffset;
4873 
4874   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4875 
4876   if (!IsGdsHardcoded) {
4877     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4878   }
4879   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4880 }
4881 
4882 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4883   OptionalImmIndexMap OptionalIdx;
4884 
4885   unsigned OperandIdx[4];
4886   unsigned EnMask = 0;
4887   int SrcIdx = 0;
4888 
4889   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4890     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4891 
4892     // Add the register arguments
4893     if (Op.isReg()) {
4894       assert(SrcIdx < 4);
4895       OperandIdx[SrcIdx] = Inst.size();
4896       Op.addRegOperands(Inst, 1);
4897       ++SrcIdx;
4898       continue;
4899     }
4900 
4901     if (Op.isOff()) {
4902       assert(SrcIdx < 4);
4903       OperandIdx[SrcIdx] = Inst.size();
4904       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4905       ++SrcIdx;
4906       continue;
4907     }
4908 
4909     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4910       Op.addImmOperands(Inst, 1);
4911       continue;
4912     }
4913 
4914     if (Op.isToken() && Op.getToken() == "done")
4915       continue;
4916 
4917     // Handle optional arguments
4918     OptionalIdx[Op.getImmTy()] = i;
4919   }
4920 
4921   assert(SrcIdx == 4);
4922 
4923   bool Compr = false;
4924   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4925     Compr = true;
4926     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4927     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4928     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4929   }
4930 
4931   for (auto i = 0; i < SrcIdx; ++i) {
4932     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4933       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4934     }
4935   }
4936 
4937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4939 
4940   Inst.addOperand(MCOperand::createImm(EnMask));
4941 }
4942 
4943 //===----------------------------------------------------------------------===//
4944 // s_waitcnt
4945 //===----------------------------------------------------------------------===//
4946 
4947 static bool
4948 encodeCnt(
4949   const AMDGPU::IsaVersion ISA,
4950   int64_t &IntVal,
4951   int64_t CntVal,
4952   bool Saturate,
4953   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4954   unsigned (*decode)(const IsaVersion &Version, unsigned))
4955 {
4956   bool Failed = false;
4957 
4958   IntVal = encode(ISA, IntVal, CntVal);
4959   if (CntVal != decode(ISA, IntVal)) {
4960     if (Saturate) {
4961       IntVal = encode(ISA, IntVal, -1);
4962     } else {
4963       Failed = true;
4964     }
4965   }
4966   return Failed;
4967 }
4968 
4969 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4970 
4971   SMLoc CntLoc = getLoc();
4972   StringRef CntName = getTokenStr();
4973 
4974   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4975       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4976     return false;
4977 
4978   int64_t CntVal;
4979   SMLoc ValLoc = getLoc();
4980   if (!parseExpr(CntVal))
4981     return false;
4982 
4983   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4984 
4985   bool Failed = true;
4986   bool Sat = CntName.endswith("_sat");
4987 
4988   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4989     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4990   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4991     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4992   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4993     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4994   } else {
4995     Error(CntLoc, "invalid counter name " + CntName);
4996     return false;
4997   }
4998 
4999   if (Failed) {
5000     Error(ValLoc, "too large value for " + CntName);
5001     return false;
5002   }
5003 
5004   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5005     return false;
5006 
5007   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5008     if (isToken(AsmToken::EndOfStatement)) {
5009       Error(getLoc(), "expected a counter name");
5010       return false;
5011     }
5012   }
5013 
5014   return true;
5015 }
5016 
5017 OperandMatchResultTy
5018 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5019   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5020   int64_t Waitcnt = getWaitcntBitMask(ISA);
5021   SMLoc S = getLoc();
5022 
5023   // If parse failed, do not return error code
5024   // to avoid excessive error messages.
5025   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5026     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
5027   } else {
5028     parseExpr(Waitcnt);
5029   }
5030 
5031   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5032   return MatchOperand_Success;
5033 }
5034 
5035 bool
5036 AMDGPUOperand::isSWaitCnt() const {
5037   return isImm();
5038 }
5039 
5040 //===----------------------------------------------------------------------===//
5041 // hwreg
5042 //===----------------------------------------------------------------------===//
5043 
5044 bool
5045 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5046                                 int64_t &Offset,
5047                                 int64_t &Width) {
5048   using namespace llvm::AMDGPU::Hwreg;
5049 
5050   // The register may be specified by name or using a numeric code
5051   if (isToken(AsmToken::Identifier) &&
5052       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5053     HwReg.IsSymbolic = true;
5054     lex(); // skip message name
5055   } else if (!parseExpr(HwReg.Id)) {
5056     return false;
5057   }
5058 
5059   if (trySkipToken(AsmToken::RParen))
5060     return true;
5061 
5062   // parse optional params
5063   return
5064     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5065     parseExpr(Offset) &&
5066     skipToken(AsmToken::Comma, "expected a comma") &&
5067     parseExpr(Width) &&
5068     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5069 }
5070 
5071 bool
5072 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5073                                const int64_t Offset,
5074                                const int64_t Width,
5075                                const SMLoc Loc) {
5076 
5077   using namespace llvm::AMDGPU::Hwreg;
5078 
5079   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5080     Error(Loc, "specified hardware register is not supported on this GPU");
5081     return false;
5082   } else if (!isValidHwreg(HwReg.Id)) {
5083     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5084     return false;
5085   } else if (!isValidHwregOffset(Offset)) {
5086     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5087     return false;
5088   } else if (!isValidHwregWidth(Width)) {
5089     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5090     return false;
5091   }
5092   return true;
5093 }
5094 
5095 OperandMatchResultTy
5096 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5097   using namespace llvm::AMDGPU::Hwreg;
5098 
5099   int64_t ImmVal = 0;
5100   SMLoc Loc = getLoc();
5101 
5102   // If parse failed, do not return error code
5103   // to avoid excessive error messages.
5104   if (trySkipId("hwreg", AsmToken::LParen)) {
5105     OperandInfoTy HwReg(ID_UNKNOWN_);
5106     int64_t Offset = OFFSET_DEFAULT_;
5107     int64_t Width = WIDTH_DEFAULT_;
5108     if (parseHwregBody(HwReg, Offset, Width) &&
5109         validateHwreg(HwReg, Offset, Width, Loc)) {
5110       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5111     }
5112   } else if (parseExpr(ImmVal)) {
5113     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5114       Error(Loc, "invalid immediate: only 16-bit values are legal");
5115   }
5116 
5117   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5118   return MatchOperand_Success;
5119 }
5120 
5121 bool AMDGPUOperand::isHwreg() const {
5122   return isImmTy(ImmTyHwreg);
5123 }
5124 
5125 //===----------------------------------------------------------------------===//
5126 // sendmsg
5127 //===----------------------------------------------------------------------===//
5128 
5129 bool
5130 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5131                                   OperandInfoTy &Op,
5132                                   OperandInfoTy &Stream) {
5133   using namespace llvm::AMDGPU::SendMsg;
5134 
5135   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5136     Msg.IsSymbolic = true;
5137     lex(); // skip message name
5138   } else if (!parseExpr(Msg.Id)) {
5139     return false;
5140   }
5141 
5142   if (trySkipToken(AsmToken::Comma)) {
5143     Op.IsDefined = true;
5144     if (isToken(AsmToken::Identifier) &&
5145         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5146       lex(); // skip operation name
5147     } else if (!parseExpr(Op.Id)) {
5148       return false;
5149     }
5150 
5151     if (trySkipToken(AsmToken::Comma)) {
5152       Stream.IsDefined = true;
5153       if (!parseExpr(Stream.Id))
5154         return false;
5155     }
5156   }
5157 
5158   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5159 }
5160 
5161 bool
5162 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5163                                  const OperandInfoTy &Op,
5164                                  const OperandInfoTy &Stream,
5165                                  const SMLoc S) {
5166   using namespace llvm::AMDGPU::SendMsg;
5167 
5168   // Validation strictness depends on whether message is specified
5169   // in a symbolc or in a numeric form. In the latter case
5170   // only encoding possibility is checked.
5171   bool Strict = Msg.IsSymbolic;
5172 
5173   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5174     Error(S, "invalid message id");
5175     return false;
5176   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5177     Error(S, Op.IsDefined ?
5178              "message does not support operations" :
5179              "missing message operation");
5180     return false;
5181   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5182     Error(S, "invalid operation id");
5183     return false;
5184   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5185     Error(S, "message operation does not support streams");
5186     return false;
5187   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5188     Error(S, "invalid message stream id");
5189     return false;
5190   }
5191   return true;
5192 }
5193 
5194 OperandMatchResultTy
5195 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5196   using namespace llvm::AMDGPU::SendMsg;
5197 
5198   int64_t ImmVal = 0;
5199   SMLoc Loc = getLoc();
5200 
5201   // If parse failed, do not return error code
5202   // to avoid excessive error messages.
5203   if (trySkipId("sendmsg", AsmToken::LParen)) {
5204     OperandInfoTy Msg(ID_UNKNOWN_);
5205     OperandInfoTy Op(OP_NONE_);
5206     OperandInfoTy Stream(STREAM_ID_NONE_);
5207     if (parseSendMsgBody(Msg, Op, Stream) &&
5208         validateSendMsg(Msg, Op, Stream, Loc)) {
5209       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5210     }
5211   } else if (parseExpr(ImmVal)) {
5212     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5213       Error(Loc, "invalid immediate: only 16-bit values are legal");
5214   }
5215 
5216   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5217   return MatchOperand_Success;
5218 }
5219 
5220 bool AMDGPUOperand::isSendMsg() const {
5221   return isImmTy(ImmTySendMsg);
5222 }
5223 
5224 //===----------------------------------------------------------------------===//
5225 // v_interp
5226 //===----------------------------------------------------------------------===//
5227 
5228 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5229   if (getLexer().getKind() != AsmToken::Identifier)
5230     return MatchOperand_NoMatch;
5231 
5232   StringRef Str = Parser.getTok().getString();
5233   int Slot = StringSwitch<int>(Str)
5234     .Case("p10", 0)
5235     .Case("p20", 1)
5236     .Case("p0", 2)
5237     .Default(-1);
5238 
5239   SMLoc S = Parser.getTok().getLoc();
5240   if (Slot == -1)
5241     return MatchOperand_ParseFail;
5242 
5243   Parser.Lex();
5244   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5245                                               AMDGPUOperand::ImmTyInterpSlot));
5246   return MatchOperand_Success;
5247 }
5248 
5249 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5250   if (getLexer().getKind() != AsmToken::Identifier)
5251     return MatchOperand_NoMatch;
5252 
5253   StringRef Str = Parser.getTok().getString();
5254   if (!Str.startswith("attr"))
5255     return MatchOperand_NoMatch;
5256 
5257   StringRef Chan = Str.take_back(2);
5258   int AttrChan = StringSwitch<int>(Chan)
5259     .Case(".x", 0)
5260     .Case(".y", 1)
5261     .Case(".z", 2)
5262     .Case(".w", 3)
5263     .Default(-1);
5264   if (AttrChan == -1)
5265     return MatchOperand_ParseFail;
5266 
5267   Str = Str.drop_back(2).drop_front(4);
5268 
5269   uint8_t Attr;
5270   if (Str.getAsInteger(10, Attr))
5271     return MatchOperand_ParseFail;
5272 
5273   SMLoc S = Parser.getTok().getLoc();
5274   Parser.Lex();
5275   if (Attr > 63) {
5276     Error(S, "out of bounds attr");
5277     return MatchOperand_Success;
5278   }
5279 
5280   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5281 
5282   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5283                                               AMDGPUOperand::ImmTyInterpAttr));
5284   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5285                                               AMDGPUOperand::ImmTyAttrChan));
5286   return MatchOperand_Success;
5287 }
5288 
5289 //===----------------------------------------------------------------------===//
5290 // exp
5291 //===----------------------------------------------------------------------===//
5292 
5293 void AMDGPUAsmParser::errorExpTgt() {
5294   Error(Parser.getTok().getLoc(), "invalid exp target");
5295 }
5296 
5297 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5298                                                       uint8_t &Val) {
5299   if (Str == "null") {
5300     Val = 9;
5301     return MatchOperand_Success;
5302   }
5303 
5304   if (Str.startswith("mrt")) {
5305     Str = Str.drop_front(3);
5306     if (Str == "z") { // == mrtz
5307       Val = 8;
5308       return MatchOperand_Success;
5309     }
5310 
5311     if (Str.getAsInteger(10, Val))
5312       return MatchOperand_ParseFail;
5313 
5314     if (Val > 7)
5315       errorExpTgt();
5316 
5317     return MatchOperand_Success;
5318   }
5319 
5320   if (Str.startswith("pos")) {
5321     Str = Str.drop_front(3);
5322     if (Str.getAsInteger(10, Val))
5323       return MatchOperand_ParseFail;
5324 
5325     if (Val > 4 || (Val == 4 && !isGFX10()))
5326       errorExpTgt();
5327 
5328     Val += 12;
5329     return MatchOperand_Success;
5330   }
5331 
5332   if (isGFX10() && Str == "prim") {
5333     Val = 20;
5334     return MatchOperand_Success;
5335   }
5336 
5337   if (Str.startswith("param")) {
5338     Str = Str.drop_front(5);
5339     if (Str.getAsInteger(10, Val))
5340       return MatchOperand_ParseFail;
5341 
5342     if (Val >= 32)
5343       errorExpTgt();
5344 
5345     Val += 32;
5346     return MatchOperand_Success;
5347   }
5348 
5349   if (Str.startswith("invalid_target_")) {
5350     Str = Str.drop_front(15);
5351     if (Str.getAsInteger(10, Val))
5352       return MatchOperand_ParseFail;
5353 
5354     errorExpTgt();
5355     return MatchOperand_Success;
5356   }
5357 
5358   return MatchOperand_NoMatch;
5359 }
5360 
5361 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5362   uint8_t Val;
5363   StringRef Str = Parser.getTok().getString();
5364 
5365   auto Res = parseExpTgtImpl(Str, Val);
5366   if (Res != MatchOperand_Success)
5367     return Res;
5368 
5369   SMLoc S = Parser.getTok().getLoc();
5370   Parser.Lex();
5371 
5372   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5373                                               AMDGPUOperand::ImmTyExpTgt));
5374   return MatchOperand_Success;
5375 }
5376 
5377 //===----------------------------------------------------------------------===//
5378 // parser helpers
5379 //===----------------------------------------------------------------------===//
5380 
5381 bool
5382 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5383   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5384 }
5385 
5386 bool
5387 AMDGPUAsmParser::isId(const StringRef Id) const {
5388   return isId(getToken(), Id);
5389 }
5390 
5391 bool
5392 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5393   return getTokenKind() == Kind;
5394 }
5395 
5396 bool
5397 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5398   if (isId(Id)) {
5399     lex();
5400     return true;
5401   }
5402   return false;
5403 }
5404 
5405 bool
5406 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5407   if (isId(Id) && peekToken().is(Kind)) {
5408     lex();
5409     lex();
5410     return true;
5411   }
5412   return false;
5413 }
5414 
5415 bool
5416 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5417   if (isToken(Kind)) {
5418     lex();
5419     return true;
5420   }
5421   return false;
5422 }
5423 
5424 bool
5425 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5426                            const StringRef ErrMsg) {
5427   if (!trySkipToken(Kind)) {
5428     Error(getLoc(), ErrMsg);
5429     return false;
5430   }
5431   return true;
5432 }
5433 
5434 bool
5435 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5436   return !getParser().parseAbsoluteExpression(Imm);
5437 }
5438 
5439 bool
5440 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5441   SMLoc S = getLoc();
5442 
5443   const MCExpr *Expr;
5444   if (Parser.parseExpression(Expr))
5445     return false;
5446 
5447   int64_t IntVal;
5448   if (Expr->evaluateAsAbsolute(IntVal)) {
5449     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5450   } else {
5451     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5452   }
5453   return true;
5454 }
5455 
5456 bool
5457 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5458   if (isToken(AsmToken::String)) {
5459     Val = getToken().getStringContents();
5460     lex();
5461     return true;
5462   } else {
5463     Error(getLoc(), ErrMsg);
5464     return false;
5465   }
5466 }
5467 
5468 AsmToken
5469 AMDGPUAsmParser::getToken() const {
5470   return Parser.getTok();
5471 }
5472 
5473 AsmToken
5474 AMDGPUAsmParser::peekToken() {
5475   return getLexer().peekTok();
5476 }
5477 
5478 void
5479 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5480   auto TokCount = getLexer().peekTokens(Tokens);
5481 
5482   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5483     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5484 }
5485 
5486 AsmToken::TokenKind
5487 AMDGPUAsmParser::getTokenKind() const {
5488   return getLexer().getKind();
5489 }
5490 
5491 SMLoc
5492 AMDGPUAsmParser::getLoc() const {
5493   return getToken().getLoc();
5494 }
5495 
5496 StringRef
5497 AMDGPUAsmParser::getTokenStr() const {
5498   return getToken().getString();
5499 }
5500 
5501 void
5502 AMDGPUAsmParser::lex() {
5503   Parser.Lex();
5504 }
5505 
5506 //===----------------------------------------------------------------------===//
5507 // swizzle
5508 //===----------------------------------------------------------------------===//
5509 
5510 LLVM_READNONE
5511 static unsigned
5512 encodeBitmaskPerm(const unsigned AndMask,
5513                   const unsigned OrMask,
5514                   const unsigned XorMask) {
5515   using namespace llvm::AMDGPU::Swizzle;
5516 
5517   return BITMASK_PERM_ENC |
5518          (AndMask << BITMASK_AND_SHIFT) |
5519          (OrMask  << BITMASK_OR_SHIFT)  |
5520          (XorMask << BITMASK_XOR_SHIFT);
5521 }
5522 
5523 bool
5524 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5525                                       const unsigned MinVal,
5526                                       const unsigned MaxVal,
5527                                       const StringRef ErrMsg) {
5528   for (unsigned i = 0; i < OpNum; ++i) {
5529     if (!skipToken(AsmToken::Comma, "expected a comma")){
5530       return false;
5531     }
5532     SMLoc ExprLoc = Parser.getTok().getLoc();
5533     if (!parseExpr(Op[i])) {
5534       return false;
5535     }
5536     if (Op[i] < MinVal || Op[i] > MaxVal) {
5537       Error(ExprLoc, ErrMsg);
5538       return false;
5539     }
5540   }
5541 
5542   return true;
5543 }
5544 
5545 bool
5546 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5547   using namespace llvm::AMDGPU::Swizzle;
5548 
5549   int64_t Lane[LANE_NUM];
5550   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5551                            "expected a 2-bit lane id")) {
5552     Imm = QUAD_PERM_ENC;
5553     for (unsigned I = 0; I < LANE_NUM; ++I) {
5554       Imm |= Lane[I] << (LANE_SHIFT * I);
5555     }
5556     return true;
5557   }
5558   return false;
5559 }
5560 
5561 bool
5562 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5563   using namespace llvm::AMDGPU::Swizzle;
5564 
5565   SMLoc S = Parser.getTok().getLoc();
5566   int64_t GroupSize;
5567   int64_t LaneIdx;
5568 
5569   if (!parseSwizzleOperands(1, &GroupSize,
5570                             2, 32,
5571                             "group size must be in the interval [2,32]")) {
5572     return false;
5573   }
5574   if (!isPowerOf2_64(GroupSize)) {
5575     Error(S, "group size must be a power of two");
5576     return false;
5577   }
5578   if (parseSwizzleOperands(1, &LaneIdx,
5579                            0, GroupSize - 1,
5580                            "lane id must be in the interval [0,group size - 1]")) {
5581     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5582     return true;
5583   }
5584   return false;
5585 }
5586 
5587 bool
5588 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5589   using namespace llvm::AMDGPU::Swizzle;
5590 
5591   SMLoc S = Parser.getTok().getLoc();
5592   int64_t GroupSize;
5593 
5594   if (!parseSwizzleOperands(1, &GroupSize,
5595       2, 32, "group size must be in the interval [2,32]")) {
5596     return false;
5597   }
5598   if (!isPowerOf2_64(GroupSize)) {
5599     Error(S, "group size must be a power of two");
5600     return false;
5601   }
5602 
5603   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5604   return true;
5605 }
5606 
5607 bool
5608 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5609   using namespace llvm::AMDGPU::Swizzle;
5610 
5611   SMLoc S = Parser.getTok().getLoc();
5612   int64_t GroupSize;
5613 
5614   if (!parseSwizzleOperands(1, &GroupSize,
5615       1, 16, "group size must be in the interval [1,16]")) {
5616     return false;
5617   }
5618   if (!isPowerOf2_64(GroupSize)) {
5619     Error(S, "group size must be a power of two");
5620     return false;
5621   }
5622 
5623   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5624   return true;
5625 }
5626 
5627 bool
5628 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5629   using namespace llvm::AMDGPU::Swizzle;
5630 
5631   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5632     return false;
5633   }
5634 
5635   StringRef Ctl;
5636   SMLoc StrLoc = Parser.getTok().getLoc();
5637   if (!parseString(Ctl)) {
5638     return false;
5639   }
5640   if (Ctl.size() != BITMASK_WIDTH) {
5641     Error(StrLoc, "expected a 5-character mask");
5642     return false;
5643   }
5644 
5645   unsigned AndMask = 0;
5646   unsigned OrMask = 0;
5647   unsigned XorMask = 0;
5648 
5649   for (size_t i = 0; i < Ctl.size(); ++i) {
5650     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5651     switch(Ctl[i]) {
5652     default:
5653       Error(StrLoc, "invalid mask");
5654       return false;
5655     case '0':
5656       break;
5657     case '1':
5658       OrMask |= Mask;
5659       break;
5660     case 'p':
5661       AndMask |= Mask;
5662       break;
5663     case 'i':
5664       AndMask |= Mask;
5665       XorMask |= Mask;
5666       break;
5667     }
5668   }
5669 
5670   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5671   return true;
5672 }
5673 
5674 bool
5675 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5676 
5677   SMLoc OffsetLoc = Parser.getTok().getLoc();
5678 
5679   if (!parseExpr(Imm)) {
5680     return false;
5681   }
5682   if (!isUInt<16>(Imm)) {
5683     Error(OffsetLoc, "expected a 16-bit offset");
5684     return false;
5685   }
5686   return true;
5687 }
5688 
5689 bool
5690 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5691   using namespace llvm::AMDGPU::Swizzle;
5692 
5693   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5694 
5695     SMLoc ModeLoc = Parser.getTok().getLoc();
5696     bool Ok = false;
5697 
5698     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5699       Ok = parseSwizzleQuadPerm(Imm);
5700     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5701       Ok = parseSwizzleBitmaskPerm(Imm);
5702     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5703       Ok = parseSwizzleBroadcast(Imm);
5704     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5705       Ok = parseSwizzleSwap(Imm);
5706     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5707       Ok = parseSwizzleReverse(Imm);
5708     } else {
5709       Error(ModeLoc, "expected a swizzle mode");
5710     }
5711 
5712     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5713   }
5714 
5715   return false;
5716 }
5717 
5718 OperandMatchResultTy
5719 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5720   SMLoc S = Parser.getTok().getLoc();
5721   int64_t Imm = 0;
5722 
5723   if (trySkipId("offset")) {
5724 
5725     bool Ok = false;
5726     if (skipToken(AsmToken::Colon, "expected a colon")) {
5727       if (trySkipId("swizzle")) {
5728         Ok = parseSwizzleMacro(Imm);
5729       } else {
5730         Ok = parseSwizzleOffset(Imm);
5731       }
5732     }
5733 
5734     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5735 
5736     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5737   } else {
5738     // Swizzle "offset" operand is optional.
5739     // If it is omitted, try parsing other optional operands.
5740     return parseOptionalOpr(Operands);
5741   }
5742 }
5743 
5744 bool
5745 AMDGPUOperand::isSwizzle() const {
5746   return isImmTy(ImmTySwizzle);
5747 }
5748 
5749 //===----------------------------------------------------------------------===//
5750 // VGPR Index Mode
5751 //===----------------------------------------------------------------------===//
5752 
5753 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5754 
5755   using namespace llvm::AMDGPU::VGPRIndexMode;
5756 
5757   if (trySkipToken(AsmToken::RParen)) {
5758     return OFF;
5759   }
5760 
5761   int64_t Imm = 0;
5762 
5763   while (true) {
5764     unsigned Mode = 0;
5765     SMLoc S = Parser.getTok().getLoc();
5766 
5767     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5768       if (trySkipId(IdSymbolic[ModeId])) {
5769         Mode = 1 << ModeId;
5770         break;
5771       }
5772     }
5773 
5774     if (Mode == 0) {
5775       Error(S, (Imm == 0)?
5776                "expected a VGPR index mode or a closing parenthesis" :
5777                "expected a VGPR index mode");
5778       break;
5779     }
5780 
5781     if (Imm & Mode) {
5782       Error(S, "duplicate VGPR index mode");
5783       break;
5784     }
5785     Imm |= Mode;
5786 
5787     if (trySkipToken(AsmToken::RParen))
5788       break;
5789     if (!skipToken(AsmToken::Comma,
5790                    "expected a comma or a closing parenthesis"))
5791       break;
5792   }
5793 
5794   return Imm;
5795 }
5796 
5797 OperandMatchResultTy
5798 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5799 
5800   int64_t Imm = 0;
5801   SMLoc S = Parser.getTok().getLoc();
5802 
5803   if (getLexer().getKind() == AsmToken::Identifier &&
5804       Parser.getTok().getString() == "gpr_idx" &&
5805       getLexer().peekTok().is(AsmToken::LParen)) {
5806 
5807     Parser.Lex();
5808     Parser.Lex();
5809 
5810     // If parse failed, trigger an error but do not return error code
5811     // to avoid excessive error messages.
5812     Imm = parseGPRIdxMacro();
5813 
5814   } else {
5815     if (getParser().parseAbsoluteExpression(Imm))
5816       return MatchOperand_NoMatch;
5817     if (Imm < 0 || !isUInt<4>(Imm)) {
5818       Error(S, "invalid immediate: only 4-bit values are legal");
5819     }
5820   }
5821 
5822   Operands.push_back(
5823       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5824   return MatchOperand_Success;
5825 }
5826 
5827 bool AMDGPUOperand::isGPRIdxMode() const {
5828   return isImmTy(ImmTyGprIdxMode);
5829 }
5830 
5831 //===----------------------------------------------------------------------===//
5832 // sopp branch targets
5833 //===----------------------------------------------------------------------===//
5834 
5835 OperandMatchResultTy
5836 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5837 
5838   // Make sure we are not parsing something
5839   // that looks like a label or an expression but is not.
5840   // This will improve error messages.
5841   if (isRegister() || isModifier())
5842     return MatchOperand_NoMatch;
5843 
5844   if (parseExpr(Operands)) {
5845 
5846     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5847     assert(Opr.isImm() || Opr.isExpr());
5848     SMLoc Loc = Opr.getStartLoc();
5849 
5850     // Currently we do not support arbitrary expressions as branch targets.
5851     // Only labels and absolute expressions are accepted.
5852     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5853       Error(Loc, "expected an absolute expression or a label");
5854     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5855       Error(Loc, "expected a 16-bit signed jump offset");
5856     }
5857   }
5858 
5859   return MatchOperand_Success; // avoid excessive error messages
5860 }
5861 
5862 //===----------------------------------------------------------------------===//
5863 // Boolean holding registers
5864 //===----------------------------------------------------------------------===//
5865 
5866 OperandMatchResultTy
5867 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5868   return parseReg(Operands);
5869 }
5870 
5871 //===----------------------------------------------------------------------===//
5872 // mubuf
5873 //===----------------------------------------------------------------------===//
5874 
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5876   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5877 }
5878 
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5880   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5881 }
5882 
5883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5884   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5885 }
5886 
5887 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5888                                const OperandVector &Operands,
5889                                bool IsAtomic,
5890                                bool IsAtomicReturn,
5891                                bool IsLds) {
5892   bool IsLdsOpcode = IsLds;
5893   bool HasLdsModifier = false;
5894   OptionalImmIndexMap OptionalIdx;
5895   assert(IsAtomicReturn ? IsAtomic : true);
5896   unsigned FirstOperandIdx = 1;
5897 
5898   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5899     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5900 
5901     // Add the register arguments
5902     if (Op.isReg()) {
5903       Op.addRegOperands(Inst, 1);
5904       // Insert a tied src for atomic return dst.
5905       // This cannot be postponed as subsequent calls to
5906       // addImmOperands rely on correct number of MC operands.
5907       if (IsAtomicReturn && i == FirstOperandIdx)
5908         Op.addRegOperands(Inst, 1);
5909       continue;
5910     }
5911 
5912     // Handle the case where soffset is an immediate
5913     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5914       Op.addImmOperands(Inst, 1);
5915       continue;
5916     }
5917 
5918     HasLdsModifier |= Op.isLDS();
5919 
5920     // Handle tokens like 'offen' which are sometimes hard-coded into the
5921     // asm string.  There are no MCInst operands for these.
5922     if (Op.isToken()) {
5923       continue;
5924     }
5925     assert(Op.isImm());
5926 
5927     // Handle optional arguments
5928     OptionalIdx[Op.getImmTy()] = i;
5929   }
5930 
5931   // This is a workaround for an llvm quirk which may result in an
5932   // incorrect instruction selection. Lds and non-lds versions of
5933   // MUBUF instructions are identical except that lds versions
5934   // have mandatory 'lds' modifier. However this modifier follows
5935   // optional modifiers and llvm asm matcher regards this 'lds'
5936   // modifier as an optional one. As a result, an lds version
5937   // of opcode may be selected even if it has no 'lds' modifier.
5938   if (IsLdsOpcode && !HasLdsModifier) {
5939     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5940     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5941       Inst.setOpcode(NoLdsOpcode);
5942       IsLdsOpcode = false;
5943     }
5944   }
5945 
5946   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5947   if (!IsAtomic) { // glc is hard-coded.
5948     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5949   }
5950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5951 
5952   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5953     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5954   }
5955 
5956   if (isGFX10())
5957     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5958 }
5959 
5960 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5961   OptionalImmIndexMap OptionalIdx;
5962 
5963   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5964     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5965 
5966     // Add the register arguments
5967     if (Op.isReg()) {
5968       Op.addRegOperands(Inst, 1);
5969       continue;
5970     }
5971 
5972     // Handle the case where soffset is an immediate
5973     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5974       Op.addImmOperands(Inst, 1);
5975       continue;
5976     }
5977 
5978     // Handle tokens like 'offen' which are sometimes hard-coded into the
5979     // asm string.  There are no MCInst operands for these.
5980     if (Op.isToken()) {
5981       continue;
5982     }
5983     assert(Op.isImm());
5984 
5985     // Handle optional arguments
5986     OptionalIdx[Op.getImmTy()] = i;
5987   }
5988 
5989   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5990                         AMDGPUOperand::ImmTyOffset);
5991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5993   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5995 
5996   if (isGFX10())
5997     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5998 }
5999 
6000 //===----------------------------------------------------------------------===//
6001 // mimg
6002 //===----------------------------------------------------------------------===//
6003 
6004 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6005                               bool IsAtomic) {
6006   unsigned I = 1;
6007   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6008   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6009     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6010   }
6011 
6012   if (IsAtomic) {
6013     // Add src, same as dst
6014     assert(Desc.getNumDefs() == 1);
6015     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6016   }
6017 
6018   OptionalImmIndexMap OptionalIdx;
6019 
6020   for (unsigned E = Operands.size(); I != E; ++I) {
6021     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6022 
6023     // Add the register arguments
6024     if (Op.isReg()) {
6025       Op.addRegOperands(Inst, 1);
6026     } else if (Op.isImmModifier()) {
6027       OptionalIdx[Op.getImmTy()] = I;
6028     } else if (!Op.isToken()) {
6029       llvm_unreachable("unexpected operand type");
6030     }
6031   }
6032 
6033   bool IsGFX10 = isGFX10();
6034 
6035   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6036   if (IsGFX10)
6037     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6038   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6039   if (IsGFX10)
6040     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6041   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6042   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6043   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6044   if (IsGFX10)
6045     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6047   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6048   if (!IsGFX10)
6049     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6050   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6051 }
6052 
6053 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6054   cvtMIMG(Inst, Operands, true);
6055 }
6056 
6057 //===----------------------------------------------------------------------===//
6058 // smrd
6059 //===----------------------------------------------------------------------===//
6060 
6061 bool AMDGPUOperand::isSMRDOffset8() const {
6062   return isImm() && isUInt<8>(getImm());
6063 }
6064 
6065 bool AMDGPUOperand::isSMRDOffset20() const {
6066   return isImm() && isUInt<20>(getImm());
6067 }
6068 
6069 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6070   // 32-bit literals are only supported on CI and we only want to use them
6071   // when the offset is > 8-bits.
6072   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6073 }
6074 
6075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6076   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6077 }
6078 
6079 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6080   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6081 }
6082 
6083 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6084   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6085 }
6086 
6087 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6088   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6089 }
6090 
6091 //===----------------------------------------------------------------------===//
6092 // vop3
6093 //===----------------------------------------------------------------------===//
6094 
6095 static bool ConvertOmodMul(int64_t &Mul) {
6096   if (Mul != 1 && Mul != 2 && Mul != 4)
6097     return false;
6098 
6099   Mul >>= 1;
6100   return true;
6101 }
6102 
6103 static bool ConvertOmodDiv(int64_t &Div) {
6104   if (Div == 1) {
6105     Div = 0;
6106     return true;
6107   }
6108 
6109   if (Div == 2) {
6110     Div = 3;
6111     return true;
6112   }
6113 
6114   return false;
6115 }
6116 
6117 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6118   if (BoundCtrl == 0) {
6119     BoundCtrl = 1;
6120     return true;
6121   }
6122 
6123   if (BoundCtrl == -1) {
6124     BoundCtrl = 0;
6125     return true;
6126   }
6127 
6128   return false;
6129 }
6130 
6131 // Note: the order in this table matches the order of operands in AsmString.
6132 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6133   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6134   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6135   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6136   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6137   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6138   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6139   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6140   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6141   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6142   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6143   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6144   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6145   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6146   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6147   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6148   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6149   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6150   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6151   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6152   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6153   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6154   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6155   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
6156   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6157   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6158   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6159   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6160   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6161   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6162   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6163   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6164   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6165   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6166   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6167   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6168   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6169   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6170   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6171   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6172   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6173   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6174   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6175   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6176   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6177 };
6178 
6179 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6180 
6181   OperandMatchResultTy res = parseOptionalOpr(Operands);
6182 
6183   // This is a hack to enable hardcoded mandatory operands which follow
6184   // optional operands.
6185   //
6186   // Current design assumes that all operands after the first optional operand
6187   // are also optional. However implementation of some instructions violates
6188   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6189   //
6190   // To alleviate this problem, we have to (implicitly) parse extra operands
6191   // to make sure autogenerated parser of custom operands never hit hardcoded
6192   // mandatory operands.
6193 
6194   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6195     if (res != MatchOperand_Success ||
6196         isToken(AsmToken::EndOfStatement))
6197       break;
6198 
6199     trySkipToken(AsmToken::Comma);
6200     res = parseOptionalOpr(Operands);
6201   }
6202 
6203   return res;
6204 }
6205 
6206 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6207   OperandMatchResultTy res;
6208   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6209     // try to parse any optional operand here
6210     if (Op.IsBit) {
6211       res = parseNamedBit(Op.Name, Operands, Op.Type);
6212     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6213       res = parseOModOperand(Operands);
6214     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6215                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6216                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6217       res = parseSDWASel(Operands, Op.Name, Op.Type);
6218     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6219       res = parseSDWADstUnused(Operands);
6220     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6221                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6222                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6223                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6224       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6225                                         Op.ConvertResult);
6226     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6227       res = parseDim(Operands);
6228     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6229       res = parseDfmtNfmt(Operands);
6230     } else {
6231       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6232     }
6233     if (res != MatchOperand_NoMatch) {
6234       return res;
6235     }
6236   }
6237   return MatchOperand_NoMatch;
6238 }
6239 
6240 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6241   StringRef Name = Parser.getTok().getString();
6242   if (Name == "mul") {
6243     return parseIntWithPrefix("mul", Operands,
6244                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6245   }
6246 
6247   if (Name == "div") {
6248     return parseIntWithPrefix("div", Operands,
6249                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6250   }
6251 
6252   return MatchOperand_NoMatch;
6253 }
6254 
6255 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6256   cvtVOP3P(Inst, Operands);
6257 
6258   int Opc = Inst.getOpcode();
6259 
6260   int SrcNum;
6261   const int Ops[] = { AMDGPU::OpName::src0,
6262                       AMDGPU::OpName::src1,
6263                       AMDGPU::OpName::src2 };
6264   for (SrcNum = 0;
6265        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6266        ++SrcNum);
6267   assert(SrcNum > 0);
6268 
6269   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6270   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6271 
6272   if ((OpSel & (1 << SrcNum)) != 0) {
6273     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6274     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6275     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6276   }
6277 }
6278 
6279 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6280       // 1. This operand is input modifiers
6281   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6282       // 2. This is not last operand
6283       && Desc.NumOperands > (OpNum + 1)
6284       // 3. Next operand is register class
6285       && Desc.OpInfo[OpNum + 1].RegClass != -1
6286       // 4. Next register is not tied to any other operand
6287       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6288 }
6289 
6290 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6291 {
6292   OptionalImmIndexMap OptionalIdx;
6293   unsigned Opc = Inst.getOpcode();
6294 
6295   unsigned I = 1;
6296   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6297   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6298     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6299   }
6300 
6301   for (unsigned E = Operands.size(); I != E; ++I) {
6302     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6303     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6304       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6305     } else if (Op.isInterpSlot() ||
6306                Op.isInterpAttr() ||
6307                Op.isAttrChan()) {
6308       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6309     } else if (Op.isImmModifier()) {
6310       OptionalIdx[Op.getImmTy()] = I;
6311     } else {
6312       llvm_unreachable("unhandled operand type");
6313     }
6314   }
6315 
6316   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6317     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6318   }
6319 
6320   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6321     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6322   }
6323 
6324   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6325     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6326   }
6327 }
6328 
6329 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6330                               OptionalImmIndexMap &OptionalIdx) {
6331   unsigned Opc = Inst.getOpcode();
6332 
6333   unsigned I = 1;
6334   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6335   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6336     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6337   }
6338 
6339   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6340     // This instruction has src modifiers
6341     for (unsigned E = Operands.size(); I != E; ++I) {
6342       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6343       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6344         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6345       } else if (Op.isImmModifier()) {
6346         OptionalIdx[Op.getImmTy()] = I;
6347       } else if (Op.isRegOrImm()) {
6348         Op.addRegOrImmOperands(Inst, 1);
6349       } else {
6350         llvm_unreachable("unhandled operand type");
6351       }
6352     }
6353   } else {
6354     // No src modifiers
6355     for (unsigned E = Operands.size(); I != E; ++I) {
6356       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6357       if (Op.isMod()) {
6358         OptionalIdx[Op.getImmTy()] = I;
6359       } else {
6360         Op.addRegOrImmOperands(Inst, 1);
6361       }
6362     }
6363   }
6364 
6365   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6366     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6367   }
6368 
6369   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6370     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6371   }
6372 
6373   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6374   // it has src2 register operand that is tied to dst operand
6375   // we don't allow modifiers for this operand in assembler so src2_modifiers
6376   // should be 0.
6377   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6378       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6379       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6380       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6381       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6382       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6383       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6384     auto it = Inst.begin();
6385     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6386     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6387     ++it;
6388     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6389   }
6390 }
6391 
6392 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6393   OptionalImmIndexMap OptionalIdx;
6394   cvtVOP3(Inst, Operands, OptionalIdx);
6395 }
6396 
6397 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6398                                const OperandVector &Operands) {
6399   OptionalImmIndexMap OptIdx;
6400   const int Opc = Inst.getOpcode();
6401   const MCInstrDesc &Desc = MII.get(Opc);
6402 
6403   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6404 
6405   cvtVOP3(Inst, Operands, OptIdx);
6406 
6407   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6408     assert(!IsPacked);
6409     Inst.addOperand(Inst.getOperand(0));
6410   }
6411 
6412   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6413   // instruction, and then figure out where to actually put the modifiers
6414 
6415   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6416 
6417   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6418   if (OpSelHiIdx != -1) {
6419     int DefaultVal = IsPacked ? -1 : 0;
6420     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6421                           DefaultVal);
6422   }
6423 
6424   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6425   if (NegLoIdx != -1) {
6426     assert(IsPacked);
6427     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6428     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6429   }
6430 
6431   const int Ops[] = { AMDGPU::OpName::src0,
6432                       AMDGPU::OpName::src1,
6433                       AMDGPU::OpName::src2 };
6434   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6435                          AMDGPU::OpName::src1_modifiers,
6436                          AMDGPU::OpName::src2_modifiers };
6437 
6438   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6439 
6440   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6441   unsigned OpSelHi = 0;
6442   unsigned NegLo = 0;
6443   unsigned NegHi = 0;
6444 
6445   if (OpSelHiIdx != -1) {
6446     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6447   }
6448 
6449   if (NegLoIdx != -1) {
6450     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6451     NegLo = Inst.getOperand(NegLoIdx).getImm();
6452     NegHi = Inst.getOperand(NegHiIdx).getImm();
6453   }
6454 
6455   for (int J = 0; J < 3; ++J) {
6456     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6457     if (OpIdx == -1)
6458       break;
6459 
6460     uint32_t ModVal = 0;
6461 
6462     if ((OpSel & (1 << J)) != 0)
6463       ModVal |= SISrcMods::OP_SEL_0;
6464 
6465     if ((OpSelHi & (1 << J)) != 0)
6466       ModVal |= SISrcMods::OP_SEL_1;
6467 
6468     if ((NegLo & (1 << J)) != 0)
6469       ModVal |= SISrcMods::NEG;
6470 
6471     if ((NegHi & (1 << J)) != 0)
6472       ModVal |= SISrcMods::NEG_HI;
6473 
6474     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6475 
6476     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6477   }
6478 }
6479 
6480 //===----------------------------------------------------------------------===//
6481 // dpp
6482 //===----------------------------------------------------------------------===//
6483 
6484 bool AMDGPUOperand::isDPP8() const {
6485   return isImmTy(ImmTyDPP8);
6486 }
6487 
6488 bool AMDGPUOperand::isDPPCtrl() const {
6489   using namespace AMDGPU::DPP;
6490 
6491   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6492   if (result) {
6493     int64_t Imm = getImm();
6494     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6495            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6496            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6497            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6498            (Imm == DppCtrl::WAVE_SHL1) ||
6499            (Imm == DppCtrl::WAVE_ROL1) ||
6500            (Imm == DppCtrl::WAVE_SHR1) ||
6501            (Imm == DppCtrl::WAVE_ROR1) ||
6502            (Imm == DppCtrl::ROW_MIRROR) ||
6503            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6504            (Imm == DppCtrl::BCAST15) ||
6505            (Imm == DppCtrl::BCAST31) ||
6506            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6507            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6508   }
6509   return false;
6510 }
6511 
6512 //===----------------------------------------------------------------------===//
6513 // mAI
6514 //===----------------------------------------------------------------------===//
6515 
6516 bool AMDGPUOperand::isBLGP() const {
6517   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6518 }
6519 
6520 bool AMDGPUOperand::isCBSZ() const {
6521   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6522 }
6523 
6524 bool AMDGPUOperand::isABID() const {
6525   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6526 }
6527 
6528 bool AMDGPUOperand::isS16Imm() const {
6529   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6530 }
6531 
6532 bool AMDGPUOperand::isU16Imm() const {
6533   return isImm() && isUInt<16>(getImm());
6534 }
6535 
6536 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6537   if (!isGFX10())
6538     return MatchOperand_NoMatch;
6539 
6540   SMLoc S = Parser.getTok().getLoc();
6541 
6542   if (getLexer().isNot(AsmToken::Identifier))
6543     return MatchOperand_NoMatch;
6544   if (getLexer().getTok().getString() != "dim")
6545     return MatchOperand_NoMatch;
6546 
6547   Parser.Lex();
6548   if (getLexer().isNot(AsmToken::Colon))
6549     return MatchOperand_ParseFail;
6550 
6551   Parser.Lex();
6552 
6553   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6554   // integer.
6555   std::string Token;
6556   if (getLexer().is(AsmToken::Integer)) {
6557     SMLoc Loc = getLexer().getTok().getEndLoc();
6558     Token = std::string(getLexer().getTok().getString());
6559     Parser.Lex();
6560     if (getLexer().getTok().getLoc() != Loc)
6561       return MatchOperand_ParseFail;
6562   }
6563   if (getLexer().isNot(AsmToken::Identifier))
6564     return MatchOperand_ParseFail;
6565   Token += getLexer().getTok().getString();
6566 
6567   StringRef DimId = Token;
6568   if (DimId.startswith("SQ_RSRC_IMG_"))
6569     DimId = DimId.substr(12);
6570 
6571   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6572   if (!DimInfo)
6573     return MatchOperand_ParseFail;
6574 
6575   Parser.Lex();
6576 
6577   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6578                                               AMDGPUOperand::ImmTyDim));
6579   return MatchOperand_Success;
6580 }
6581 
6582 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6583   SMLoc S = Parser.getTok().getLoc();
6584   StringRef Prefix;
6585 
6586   if (getLexer().getKind() == AsmToken::Identifier) {
6587     Prefix = Parser.getTok().getString();
6588   } else {
6589     return MatchOperand_NoMatch;
6590   }
6591 
6592   if (Prefix != "dpp8")
6593     return parseDPPCtrl(Operands);
6594   if (!isGFX10())
6595     return MatchOperand_NoMatch;
6596 
6597   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6598 
6599   int64_t Sels[8];
6600 
6601   Parser.Lex();
6602   if (getLexer().isNot(AsmToken::Colon))
6603     return MatchOperand_ParseFail;
6604 
6605   Parser.Lex();
6606   if (getLexer().isNot(AsmToken::LBrac))
6607     return MatchOperand_ParseFail;
6608 
6609   Parser.Lex();
6610   if (getParser().parseAbsoluteExpression(Sels[0]))
6611     return MatchOperand_ParseFail;
6612   if (0 > Sels[0] || 7 < Sels[0])
6613     return MatchOperand_ParseFail;
6614 
6615   for (size_t i = 1; i < 8; ++i) {
6616     if (getLexer().isNot(AsmToken::Comma))
6617       return MatchOperand_ParseFail;
6618 
6619     Parser.Lex();
6620     if (getParser().parseAbsoluteExpression(Sels[i]))
6621       return MatchOperand_ParseFail;
6622     if (0 > Sels[i] || 7 < Sels[i])
6623       return MatchOperand_ParseFail;
6624   }
6625 
6626   if (getLexer().isNot(AsmToken::RBrac))
6627     return MatchOperand_ParseFail;
6628   Parser.Lex();
6629 
6630   unsigned DPP8 = 0;
6631   for (size_t i = 0; i < 8; ++i)
6632     DPP8 |= (Sels[i] << (i * 3));
6633 
6634   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6635   return MatchOperand_Success;
6636 }
6637 
6638 OperandMatchResultTy
6639 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6640   using namespace AMDGPU::DPP;
6641 
6642   SMLoc S = Parser.getTok().getLoc();
6643   StringRef Prefix;
6644   int64_t Int;
6645 
6646   if (getLexer().getKind() == AsmToken::Identifier) {
6647     Prefix = Parser.getTok().getString();
6648   } else {
6649     return MatchOperand_NoMatch;
6650   }
6651 
6652   if (Prefix == "row_mirror") {
6653     Int = DppCtrl::ROW_MIRROR;
6654     Parser.Lex();
6655   } else if (Prefix == "row_half_mirror") {
6656     Int = DppCtrl::ROW_HALF_MIRROR;
6657     Parser.Lex();
6658   } else {
6659     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6660     if (Prefix != "quad_perm"
6661         && Prefix != "row_shl"
6662         && Prefix != "row_shr"
6663         && Prefix != "row_ror"
6664         && Prefix != "wave_shl"
6665         && Prefix != "wave_rol"
6666         && Prefix != "wave_shr"
6667         && Prefix != "wave_ror"
6668         && Prefix != "row_bcast"
6669         && Prefix != "row_share"
6670         && Prefix != "row_xmask") {
6671       return MatchOperand_NoMatch;
6672     }
6673 
6674     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6675       return MatchOperand_NoMatch;
6676 
6677     if (!isVI() && !isGFX9() &&
6678         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6679          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6680          Prefix == "row_bcast"))
6681       return MatchOperand_NoMatch;
6682 
6683     Parser.Lex();
6684     if (getLexer().isNot(AsmToken::Colon))
6685       return MatchOperand_ParseFail;
6686 
6687     if (Prefix == "quad_perm") {
6688       // quad_perm:[%d,%d,%d,%d]
6689       Parser.Lex();
6690       if (getLexer().isNot(AsmToken::LBrac))
6691         return MatchOperand_ParseFail;
6692       Parser.Lex();
6693 
6694       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6695         return MatchOperand_ParseFail;
6696 
6697       for (int i = 0; i < 3; ++i) {
6698         if (getLexer().isNot(AsmToken::Comma))
6699           return MatchOperand_ParseFail;
6700         Parser.Lex();
6701 
6702         int64_t Temp;
6703         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6704           return MatchOperand_ParseFail;
6705         const int shift = i*2 + 2;
6706         Int += (Temp << shift);
6707       }
6708 
6709       if (getLexer().isNot(AsmToken::RBrac))
6710         return MatchOperand_ParseFail;
6711       Parser.Lex();
6712     } else {
6713       // sel:%d
6714       Parser.Lex();
6715       if (getParser().parseAbsoluteExpression(Int))
6716         return MatchOperand_ParseFail;
6717 
6718       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6719         Int |= DppCtrl::ROW_SHL0;
6720       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6721         Int |= DppCtrl::ROW_SHR0;
6722       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6723         Int |= DppCtrl::ROW_ROR0;
6724       } else if (Prefix == "wave_shl" && 1 == Int) {
6725         Int = DppCtrl::WAVE_SHL1;
6726       } else if (Prefix == "wave_rol" && 1 == Int) {
6727         Int = DppCtrl::WAVE_ROL1;
6728       } else if (Prefix == "wave_shr" && 1 == Int) {
6729         Int = DppCtrl::WAVE_SHR1;
6730       } else if (Prefix == "wave_ror" && 1 == Int) {
6731         Int = DppCtrl::WAVE_ROR1;
6732       } else if (Prefix == "row_bcast") {
6733         if (Int == 15) {
6734           Int = DppCtrl::BCAST15;
6735         } else if (Int == 31) {
6736           Int = DppCtrl::BCAST31;
6737         } else {
6738           return MatchOperand_ParseFail;
6739         }
6740       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6741         Int |= DppCtrl::ROW_SHARE_FIRST;
6742       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6743         Int |= DppCtrl::ROW_XMASK_FIRST;
6744       } else {
6745         return MatchOperand_ParseFail;
6746       }
6747     }
6748   }
6749 
6750   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6751   return MatchOperand_Success;
6752 }
6753 
6754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6755   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6756 }
6757 
6758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6759   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6760 }
6761 
6762 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6763   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6764 }
6765 
6766 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6767   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6768 }
6769 
6770 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6771   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6772 }
6773 
6774 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6775   OptionalImmIndexMap OptionalIdx;
6776 
6777   unsigned I = 1;
6778   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6779   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6780     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6781   }
6782 
6783   int Fi = 0;
6784   for (unsigned E = Operands.size(); I != E; ++I) {
6785     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6786                                             MCOI::TIED_TO);
6787     if (TiedTo != -1) {
6788       assert((unsigned)TiedTo < Inst.getNumOperands());
6789       // handle tied old or src2 for MAC instructions
6790       Inst.addOperand(Inst.getOperand(TiedTo));
6791     }
6792     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6793     // Add the register arguments
6794     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6795       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6796       // Skip it.
6797       continue;
6798     }
6799 
6800     if (IsDPP8) {
6801       if (Op.isDPP8()) {
6802         Op.addImmOperands(Inst, 1);
6803       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6804         Op.addRegWithFPInputModsOperands(Inst, 2);
6805       } else if (Op.isFI()) {
6806         Fi = Op.getImm();
6807       } else if (Op.isReg()) {
6808         Op.addRegOperands(Inst, 1);
6809       } else {
6810         llvm_unreachable("Invalid operand type");
6811       }
6812     } else {
6813       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6814         Op.addRegWithFPInputModsOperands(Inst, 2);
6815       } else if (Op.isDPPCtrl()) {
6816         Op.addImmOperands(Inst, 1);
6817       } else if (Op.isImm()) {
6818         // Handle optional arguments
6819         OptionalIdx[Op.getImmTy()] = I;
6820       } else {
6821         llvm_unreachable("Invalid operand type");
6822       }
6823     }
6824   }
6825 
6826   if (IsDPP8) {
6827     using namespace llvm::AMDGPU::DPP;
6828     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6829   } else {
6830     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6832     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6833     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6834       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6835     }
6836   }
6837 }
6838 
6839 //===----------------------------------------------------------------------===//
6840 // sdwa
6841 //===----------------------------------------------------------------------===//
6842 
6843 OperandMatchResultTy
6844 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6845                               AMDGPUOperand::ImmTy Type) {
6846   using namespace llvm::AMDGPU::SDWA;
6847 
6848   SMLoc S = Parser.getTok().getLoc();
6849   StringRef Value;
6850   OperandMatchResultTy res;
6851 
6852   res = parseStringWithPrefix(Prefix, Value);
6853   if (res != MatchOperand_Success) {
6854     return res;
6855   }
6856 
6857   int64_t Int;
6858   Int = StringSwitch<int64_t>(Value)
6859         .Case("BYTE_0", SdwaSel::BYTE_0)
6860         .Case("BYTE_1", SdwaSel::BYTE_1)
6861         .Case("BYTE_2", SdwaSel::BYTE_2)
6862         .Case("BYTE_3", SdwaSel::BYTE_3)
6863         .Case("WORD_0", SdwaSel::WORD_0)
6864         .Case("WORD_1", SdwaSel::WORD_1)
6865         .Case("DWORD", SdwaSel::DWORD)
6866         .Default(0xffffffff);
6867   Parser.Lex(); // eat last token
6868 
6869   if (Int == 0xffffffff) {
6870     return MatchOperand_ParseFail;
6871   }
6872 
6873   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6874   return MatchOperand_Success;
6875 }
6876 
6877 OperandMatchResultTy
6878 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6879   using namespace llvm::AMDGPU::SDWA;
6880 
6881   SMLoc S = Parser.getTok().getLoc();
6882   StringRef Value;
6883   OperandMatchResultTy res;
6884 
6885   res = parseStringWithPrefix("dst_unused", Value);
6886   if (res != MatchOperand_Success) {
6887     return res;
6888   }
6889 
6890   int64_t Int;
6891   Int = StringSwitch<int64_t>(Value)
6892         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6893         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6894         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6895         .Default(0xffffffff);
6896   Parser.Lex(); // eat last token
6897 
6898   if (Int == 0xffffffff) {
6899     return MatchOperand_ParseFail;
6900   }
6901 
6902   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6903   return MatchOperand_Success;
6904 }
6905 
6906 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6907   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6908 }
6909 
6910 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6911   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6912 }
6913 
6914 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6915   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6916 }
6917 
6918 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6919   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6920 }
6921 
6922 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6923   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6924 }
6925 
6926 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6927                               uint64_t BasicInstType,
6928                               bool SkipDstVcc,
6929                               bool SkipSrcVcc) {
6930   using namespace llvm::AMDGPU::SDWA;
6931 
6932   OptionalImmIndexMap OptionalIdx;
6933   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6934   bool SkippedVcc = false;
6935 
6936   unsigned I = 1;
6937   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6938   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6939     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6940   }
6941 
6942   for (unsigned E = Operands.size(); I != E; ++I) {
6943     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6944     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6945         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6946       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6947       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6948       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6949       // Skip VCC only if we didn't skip it on previous iteration.
6950       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6951       if (BasicInstType == SIInstrFlags::VOP2 &&
6952           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6953            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6954         SkippedVcc = true;
6955         continue;
6956       } else if (BasicInstType == SIInstrFlags::VOPC &&
6957                  Inst.getNumOperands() == 0) {
6958         SkippedVcc = true;
6959         continue;
6960       }
6961     }
6962     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6963       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6964     } else if (Op.isImm()) {
6965       // Handle optional arguments
6966       OptionalIdx[Op.getImmTy()] = I;
6967     } else {
6968       llvm_unreachable("Invalid operand type");
6969     }
6970     SkippedVcc = false;
6971   }
6972 
6973   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6974       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6975       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6976     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6977     switch (BasicInstType) {
6978     case SIInstrFlags::VOP1:
6979       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6980       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6981         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6982       }
6983       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6984       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6985       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6986       break;
6987 
6988     case SIInstrFlags::VOP2:
6989       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6990       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6991         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6992       }
6993       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6994       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6995       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6996       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6997       break;
6998 
6999     case SIInstrFlags::VOPC:
7000       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7001         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7002       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7003       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7004       break;
7005 
7006     default:
7007       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7008     }
7009   }
7010 
7011   // special case v_mac_{f16, f32}:
7012   // it has src2 register operand that is tied to dst operand
7013   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7014       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
7015     auto it = Inst.begin();
7016     std::advance(
7017       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7018     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7019   }
7020 }
7021 
7022 //===----------------------------------------------------------------------===//
7023 // mAI
7024 //===----------------------------------------------------------------------===//
7025 
7026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7027   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7028 }
7029 
7030 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7031   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7032 }
7033 
7034 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7035   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7036 }
7037 
7038 /// Force static initialization.
7039 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7040   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7041   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7042 }
7043 
7044 #define GET_REGISTER_MATCHER
7045 #define GET_MATCHER_IMPLEMENTATION
7046 #define GET_MNEMONIC_SPELL_CHECKER
7047 #include "AMDGPUGenAsmMatcher.inc"
7048 
7049 // This fuction should be defined after auto-generated include so that we have
7050 // MatchClassKind enum defined
7051 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7052                                                      unsigned Kind) {
7053   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7054   // But MatchInstructionImpl() expects to meet token and fails to validate
7055   // operand. This method checks if we are given immediate operand but expect to
7056   // get corresponding token.
7057   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7058   switch (Kind) {
7059   case MCK_addr64:
7060     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7061   case MCK_gds:
7062     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7063   case MCK_lds:
7064     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7065   case MCK_glc:
7066     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7067   case MCK_idxen:
7068     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7069   case MCK_offen:
7070     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7071   case MCK_SSrcB32:
7072     // When operands have expression values, they will return true for isToken,
7073     // because it is not possible to distinguish between a token and an
7074     // expression at parse time. MatchInstructionImpl() will always try to
7075     // match an operand as a token, when isToken returns true, and when the
7076     // name of the expression is not a valid token, the match will fail,
7077     // so we need to handle it here.
7078     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7079   case MCK_SSrcF32:
7080     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7081   case MCK_SoppBrTarget:
7082     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7083   case MCK_VReg32OrOff:
7084     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7085   case MCK_InterpSlot:
7086     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7087   case MCK_Attr:
7088     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7089   case MCK_AttrChan:
7090     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7091   case MCK_SReg_64:
7092   case MCK_SReg_64_XEXEC:
7093     // Null is defined as a 32-bit register but
7094     // it should also be enabled with 64-bit operands.
7095     // The following code enables it for SReg_64 operands
7096     // used as source and destination. Remaining source
7097     // operands are handled in isInlinableImm.
7098     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7099   default:
7100     return Match_InvalidOperand;
7101   }
7102 }
7103 
7104 //===----------------------------------------------------------------------===//
7105 // endpgm
7106 //===----------------------------------------------------------------------===//
7107 
7108 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7109   SMLoc S = Parser.getTok().getLoc();
7110   int64_t Imm = 0;
7111 
7112   if (!parseExpr(Imm)) {
7113     // The operand is optional, if not present default to 0
7114     Imm = 0;
7115   }
7116 
7117   if (!isUInt<16>(Imm)) {
7118     Error(S, "expected a 16-bit value");
7119     return MatchOperand_ParseFail;
7120   }
7121 
7122   Operands.push_back(
7123       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7124   return MatchOperand_Success;
7125 }
7126 
7127 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7128